if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
if (!require("remotes", quietly = TRUE))
install.packages("remotes")
if (!require("pacman", quietly = TRUE))
install.packages("pacman")
if (!require("gtsummary", quietly = TRUE))
remotes::install_github("ddsjoberg/gtsummary")
if (!require("mixOmics", quietly = TRUE))
BiocManager::install("mixOmicsTeam/mixOmics")
if (!require("yingtools2", quietly = TRUE))
remotes::install_github("ying14/yingtools2")
if (!require("ggpirate", quietly = TRUE))
remotes::install_github("mikabr/ggpirate")
if (!require("paletteer", quietly = TRUE))
remotes::install_github("EmilHvitfeldt/paletteer")
if (!require("microbiomeMarker", quietly = TRUE))
BiocManager::install("microbiomeMarker")
if (!require("magick", quietly = TRUE))
install.packages("magick")
if (!require("mltest", quietly = TRUE))
install.packages("mltest")
library(pacman)
p_load(
factoextra, # Dimension reduction
FactoMineR, # Dimension reduction
stabs, # Stability variable selection
mboost, # Gradient boosting for model building
ggrepel, # Visualization, repels labels on plots
bestglm, # Logistic regression
knitr, # To change R markdown PDF options
caret, # To calculate model paramters
cutpointr, # Calculate cutpoints
RPostgreSQL, # Connect to PostgreSQL server
phyloseq, # Phyloseq data wrangling
# microbiomeMarker, # LEfSe analysis
ComplexHeatmap, # Heatmap generator
paletteer, # Extensive color palette
tidyverse, # Data wrangling and visualization
circlize, # Color ramp builder
rstatix, # Tidyverse statistics
EnhancedVolcano, # Volcano plot analysis
umap, # Uniform Manifold Approximation Projection
ggpubr, # Simple ggplots with stats
ggpirate, # ggplot version of Pirate plot
mixOmics, # PLS-DA
conflicted, # Forces conflicted packages to be used by preferred package
gridExtra, # Arrange multiple grobs
reticulate, # Run python in R
tableone, # Additional clinical tables support
lubridate, # Manipulate date objects
yingtools2, # Custom plotting functions
cowplot, # Combine plots together
epiR, # Obtain model performance measures
pROC, # Produce ROC curves
plotrix, # Add table to base R plot
glmnet, # LASSO regression
forcats, # Factor reordering
ggpmisc, # Miscellaneous ggplot helper functions
doParallel, # Parallelize functions (for optimizing cutpoints)
PRISMAstatement, # Flow table generator
DiagrammeRsvg, # Flow table visualizer
rsvg, # Flow table visualizer
scales, # Scale functions for visualizations
devtools, # To load packages from GitHub
# datscience, # Bibliography manager
formatR, # Markdown code formatter
install = FALSE
)
library("gtsummary") # Clinical tables, pacman had trouble loading this in, so had to go the manual route
{conflict_prefer("select", "dplyr")
conflict_prefer("mutate", "dplyr")
conflict_prefer("filter", "dplyr")
conflict_prefer("rename", "dplyr")
conflict_prefer("slice", "dplyr")
conflict_prefer("between", "dplyr")
conflict_prefer("annotate", "ggplot2")}
devtools::source_url("https://github.com/yingeddi2008/DFIutility/blob/master/getRdpPal.R?raw=TRUE")
# ggplot theme shortcuts
et <- element_text
eb <- element_blank
er <- element_rect
el <- element_line
opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE)
`%!in%` <- negate(`%in%`)
# Running python in R
## User must set path to their python library
reticulate::use_python("/usr/bin/python3")
# Customized lefse function
source("./Data/run_lefse2.R")# 1) Load R image load('./Data/LT_Modeling.RData')
# OR #
# 2) Individually Load R Objects
# Sample lookup
sample_lookup <- readRDS("./Data/sample_lookup.rds")
# First samples from all patients
first_samps <- readRDS("./Data/first_samps_anon.rds")
# Simplified dataframe containing infection information and
# relative abundance of targeted taxa
peri_matrix_all <- readRDS("./Data/peri_matrix_clin_all.rds")
# Distinct detailed infection data
peri_criteria_best <- readRDS("./Data/peri_criteria_best_anon.rds")
# All detailed infection data
peri_criteria_all <- readRDS("./Data/peri_criteria_all_anon.rds")
# NCBI taxonomy lookup
tax_lookup <- readRDS("./Data/tax_lookup.rds")
# Complete metaphlan dataframe for all patients and healthy
# donors
metaphlan_df <- readRDS("./Data/metaphlan_anon.rds")
# Metaphlan dataframe of patient samples
metaphlan_peri_anon <- readRDS("./Data/metaphlan_peri_anon.rds")
# Custom color palette
metaphlan_pal <- getRdpPal(metaphlan_df)
# Qualitative metabolomics
metab_qual_anon <- readRDS("./Data/metab_qual_anonym.rds")
# Quantitative metabolomics
metab_quant_anon <- readRDS("./Data/metab_quant_anonym.rds")
# Antibiotics data
abx <- readRDS("./Data/abx_anon.rds")
# Demographics data
demo <- readRDS("./Data/demo_anon.rds")
# Bile acid gene data
ba_genes <- readRDS("./Data/bile_acid_genes.rds")# Custom function to avoid any errors during map
safe_cutpointr <- possibly(.f = cutpointr, otherwise = "Error")
# Calculate the number of cores
no_cores <- detectCores() - 2
# create the cluster for caret to use
cl <- makePSOCKcluster(no_cores)
registerDoParallel(cl)
set.seed(123456)
test_abundance <- peri_matrix_all %>%
mutate(sampleID = as.factor(sampleID)) %>%
select(starts_with(c("entero", "esch", "klebs", "citro",
"rahn", "proteus")), -ends_with("abs_abundance")) %>%
mutate_if(is.character, as.numeric) %>%
mutate(enterococcus_infection = ifelse(`Enterococcus faecium` +
`Enterococcus faecalis` + `Enterococcus avium` >= 1,
1, 0), enterobacterales_infection = ifelse(`Escherichia coli` +
`Klebsiella pneumoniae` + `Citrobacter freundii` + `Proteus mirabilis` >=
1, 1, 0)) %>%
select(-c(`Enterococcus faecium`, `Enterococcus faecalis`,
`Enterococcus avium`, `Escherichia coli`, `Klebsiella pneumoniae`,
`Citrobacter freundii`, `Proteus mirabilis`)) %>%
pivot_longer(-c(enterococcus_infection, enterobacterales_infection),
names_to = "variable", values_to = "value") %>%
pivot_longer(-c(variable, value), names_to = "infection_type",
values_to = "infection") %>%
mutate(variable = paste0("Input: ", variable, "\nPredict: ",
infection_type)) %>%
group_by(infection_type, variable) %>%
group_map(~safe_cutpointr(., value, infection, variable,
method = maximize_metric, metric = youden, pos_class = 1,
boot_runs = 100, allowParallel = TRUE, na.rm = T), .keep = TRUE)
# to get cutpoint object
test_abundance[4][[1]] # ecoc rel abd, ecoc infection## # A tibble: 1 × 18
## subgroup direction
## <chr> <chr>
## 1 "Input: enterococcus_rel_abundance\nPredict: enterococcus_infection" >=
## optimal_cutpoint method youden acc sensitivity specificity
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 0.199356 maximize_metric 0.577090 0.723214 0.882353 0.694737
## AUC pos_class neg_class prevalence outcome predictor grouping
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 0.829721 1 0 0.151786 infection value variable
## data roc_curve boot
## <list> <list> <list>
## 1 <tibble [112 × 2]> <rc_ctpnt [82 × 10]> <tibble [100 × 23]>
test_abundance[1][[1]] # ebac rel abd, ebacc infection## # A tibble: 1 × 18
## subgroup
## <chr>
## 1 "Input: enterobacterales_rel_abundance\nPredict: enterobacterales_infection"
## direction optimal_cutpoint method youden acc sensitivity
## <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 >= 0.0247691 maximize_metric 0.798077 0.8125 1
## specificity AUC pos_class neg_class prevalence outcome predictor
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 0.798077 0.905048 1 0 0.0714286 infection value
## grouping data roc_curve boot
## <chr> <list> <list> <list>
## 1 variable <tibble [112 × 2]> <rc_ctpnt [59 × 10]> <tibble [100 × 23]>
test_abundance_unnest <- test_abundance %>%
map_df(as_tibble)
# obtain threshold cutpoints for relative abundance
optimal_cutpoint_rel <- test_abundance_unnest %>%
separate(subgroup, into = c("Input", "Predict"), sep = "\n",
remove = F) %>%
filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
Predict)) %>%
group_by(pos_class) %>%
filter(grepl("rel", subgroup)) %>%
select(subgroup, optimal_cutpoint)# Expansions and Infections Stats
expan_infx_stats <- peri_matrix_all %>%
left_join(peri_matrix_all %>%
select(sampleID, ends_with("infection")) %>%
ungroup()) %>%
mutate(ecoc_infx = enterococcus_infection, ecoc_infx = ifelse(ecoc_infx >=
1, 1, 0), ebac_infx = enterobacterales_infection, ebac_infx = ifelse(ebac_infx >=
1, 1, 0)) %>%
select(enterococcus_rel_abundance, enterobacterales_rel_abundance,
ecoc_infx, ebac_infx) %>%
summarise(ecoc_expan_above_cutpoint = sum(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2]), ecoc_infx_ecoc_below_cutpoint = sum(ecoc_infx ==
1 & enterococcus_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[2],
na.rm = T), ecoc_infx_ecoc_above_cutpoint = sum(ecoc_infx ==
1 & enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2],
na.rm = T), ecoc_noinfx_ecoc_below_cutpoint = sum(ecoc_infx ==
0 & enterococcus_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[2],
na.rm = T), ecoc_noinfx_ecoc_above_cutpoint = sum(ecoc_infx ==
0 & enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2],
na.rm = T), ebac_expan_above_cutpoint = sum(enterobacterales_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[1]), ebac_infx_ebac_below_cutpoint = sum(ebac_infx ==
1 & enterobacterales_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[1],
na.rm = T), ebac_infx_ebac_above_cutpoint = sum(ebac_infx ==
1 & enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1],
na.rm = T), ebac_noinfx_ebac_below_cutpoint = sum(ebac_infx ==
0 & enterobacterales_rel_abundance < optimal_cutpoint_rel$optimal_cutpoint[1],
na.rm = T), ebac_noinfx_ebac_above_cutpoint = sum(ebac_infx ==
0 & enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1],
na.rm = T)) %>%
rownames_to_column(var = "rowname") %>%
pivot_longer(-rowname, names_to = "metric", values_to = "count") %>%
mutate(percent = round((count/107) * 100, 2)) %>%
select(-rowname)
ecoc_infx_confusion <- expan_infx_stats %>%
filter(metric %in% c("ecoc_infx_ecoc_below_cutpoint", "ecoc_infx_ecoc_above_cutpoint",
"ecoc_noinfx_ecoc_below_cutpoint", "ecoc_noinfx_ecoc_above_cutpoint")) %>%
select(metric, count) %>%
pivot_wider(names_from = metric, values_from = count)
ecoc_infx_confusion_cnfs <- data.frame(Enterococcus = c("Infection",
"No Infection"), Expansion = c(ecoc_infx_confusion$ecoc_infx_ecoc_above_cutpoint,
ecoc_infx_confusion$ecoc_noinfx_ecoc_above_cutpoint), `No Expansion` = c(ecoc_infx_confusion$ecoc_infx_ecoc_below_cutpoint,
ecoc_infx_confusion$ecoc_noinfx_ecoc_below_cutpoint))
ecoc_infx_confusion_cnfs <- ecoc_infx_confusion_cnfs %>%
mutate(sensitivity = round(ecoc_infx_confusion_cnfs[1, 2]/(ecoc_infx_confusion_cnfs[1,
2] + ecoc_infx_confusion_cnfs[1, 3]), 3), specificity = round(ecoc_infx_confusion_cnfs[2,
3]/(ecoc_infx_confusion_cnfs[2, 3] + ecoc_infx_confusion_cnfs[2,
2]), 3), odds_ratio = round((ecoc_infx_confusion_cnfs[1,
2]/ecoc_infx_confusion_cnfs[1, 3])/(ecoc_infx_confusion_cnfs[2,
2]/ecoc_infx_confusion_cnfs[2, 3]), 3))
fig_5b <- test_abundance_unnest %>%
separate(subgroup, into = c("Input", "Predict"), sep = "\n",
remove = F) %>%
filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
Predict)) %>%
group_by(pos_class) %>%
ungroup() %>%
unnest(roc_curve) %>%
arrange(desc(AUC)) %>%
select(-boot) %>%
mutate(auc_label = paste0("AUC = ", formatC(round(AUC, 3),
digits = 2, format = "f")), auc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
", ", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][2, ], 2)), digits = 2, format = "f"),
"]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
", ", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][2, ], 2)), digits = 2,
format = "f"), "]")), acc_label = paste0("Accuracy = ",
formatC(round(acc, 3) * 100, digits = 0, format = "f"),
"%"), acc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0, format = "f"),
"%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0, format = "f"),
"%"), "]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), sens_label = paste0("Sensitivity = ",
formatC(round(sensitivity, 3) * 100, digits = 0, format = "f"),
"%"), sens_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), spec_label = paste0("Specificity = ",
formatC(round(specificity, 3) * 100, digits = 0, format = "f"),
"%"), spec_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"))) %>%
filter(grepl(pattern = "rel_abundance", x = Input)) %>%
mutate(subgroup = str_to_title(subgroup), subgroup = gsub(pattern = "_",
replacement = " ", x = subgroup), subgroup = gsub(pattern = "rel abundance",
replacement = "Relative Abundance (%)", x = subgroup),
subgroup2 = ifelse(grepl(x = subgroup, pattern = "enterococcus",
ignore.case = TRUE), "Enterococcus", "Enterobacterales"),
subgroup2 = factor(subgroup2, levels = c("Enterococcus",
"Enterobacterales")), odds_label = paste0("OR = ",
ecoc_infx_confusion_cnfs$odds_ratio[1])) %>%
filter(subgroup2 == "Enterococcus") %>%
ggplot(aes(x = fpr, y = tpr, color = subgroup2)) + geom_line(size = 1.2) +
geom_text(aes(label = auc_label, x = 0.5, y = 0.16), show.legend = F,
hjust = 0) + geom_text(aes(label = acc_label, x = 0.5,
y = 0.12), show.legend = F, hjust = 0) + geom_text(aes(label = spec_label,
x = 0.5, y = 0.08), show.legend = F, hjust = 0) + geom_text(aes(label = sens_label,
x = 0.5, y = 0.04), show.legend = F, hjust = 0) + geom_text(aes(label = paste0("Cutpoint = ",
round((optimal_cutpoint), digits = 3) * 100, "%"), x = 0.5,
y = 0.2), hjust = 0, show.legend = F) + geom_text(aes(label = odds_label,
x = 0.5, y = 0), show.legend = F, hjust = 0) + theme_bw() +
theme(axis.text = et(color = "black", size = 12), axis.title = et(color = "black",
size = 14), legend.text = et(size = 12), legend.title = et(size = 14),
legend.spacing.y = unit(0.5, "cm"), legend.position = "none",
panel.grid = eb(), strip.text = et(size = 14, color = "#2dc46b",
face = "bold"), strip.background = eb(), panel.border = eb(),
panel.spacing.y = unit(20, "mm"), axis.line.x = el(color = "black")) +
geom_vline(xintercept = -0.05) + geom_hline(yintercept = -0.05) +
geom_table_npc(data = ecoc_infx_confusion_cnfs %>%
column_to_rownames(var = "Enterococcus") %>%
select(Expansion, `No Expansion` = No.Expansion) %>%
mutate(Total = rowSums(.)), label = list(ecoc_infx_confusion_cnfs %>%
column_to_rownames(var = "Enterococcus") %>%
select(Expansion, `No Expansion` = No.Expansion) %>%
mutate(Total = rowSums(.))), npcx = 0.25, npcy = 0.5,
hjust = 0, vjust = 1, table.rownames = TRUE, table.theme = ttheme_minimal(core = list(bg_params = list(col = "#2dc46b"),
fg_params = list(col = "#2dc46b")), colhead = list(fg_params = list(col = "#2dc46b",
fontface = "bold")), rowhead = list(fg_params = list(col = "#2dc46b",
fontface = "bold")))) + scale_x_continuous(expand = expansion(add = c(0.001,
0.05))) + scale_y_continuous(expand = expansion(add = c(0.001,
0.05))) + facet_wrap(~subgroup2, ncol = 1, scales = "fixed") +
ylab("True Positive Rate\n") + xlab("\nFalse Positive Rate") +
scale_color_manual(values = c("#2dc46b")) + guides(color = guide_legend("Groups",
byrow = T, override.aes = list(size = 5)))
fig_5bebac_infx_confusion <- expan_infx_stats %>%
filter(metric %in% c("ebac_infx_ebac_below_cutpoint", "ebac_infx_ebac_above_cutpoint",
"ebac_noinfx_ebac_below_cutpoint", "ebac_noinfx_ebac_above_cutpoint")) %>%
select(metric, count) %>%
pivot_wider(names_from = metric, values_from = count)
ebac_infx_confusion_cnfs <- data.frame(Enterobacterales = c("Infection",
"No Infection"), Expansion = c(ebac_infx_confusion$ebac_infx_ebac_above_cutpoint,
ebac_infx_confusion$ebac_noinfx_ebac_above_cutpoint), `No Expansion` = c(ebac_infx_confusion$ebac_infx_ebac_below_cutpoint,
ebac_infx_confusion$ebac_noinfx_ebac_below_cutpoint))
ebac_infx_confusion_cnfs <- ebac_infx_confusion_cnfs %>%
mutate(sensitivity = round(ebac_infx_confusion_cnfs[1, 2]/(ebac_infx_confusion_cnfs[1,
2] + ebac_infx_confusion_cnfs[1, 3]), 3), specificity = round(ebac_infx_confusion_cnfs[2,
3]/(ebac_infx_confusion_cnfs[2, 3] + ebac_infx_confusion_cnfs[2,
2]), 3), odds_ratio = round((ebac_infx_confusion_cnfs[1,
2]/ebac_infx_confusion_cnfs[1, 3])/(ebac_infx_confusion_cnfs[2,
2]/ebac_infx_confusion_cnfs[2, 3]), 3))
fig_5d <- test_abundance_unnest %>%
separate(subgroup, into = c("Input", "Predict"), sep = "\n",
remove = F) %>%
filter(grepl("enterobacterales", Input) & grepl("enterobacterales",
Predict) | grepl("enterococcus", Input) & grepl("enterococcus",
Predict)) %>%
group_by(pos_class) %>%
ungroup() %>%
unnest(roc_curve) %>%
arrange(desc(AUC)) %>%
select(-boot) %>%
mutate(auc_label = paste0("AUC = ", formatC(round(AUC, 3),
digits = 2, format = "f")), auc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
", ", formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
AUC, alpha = 0.05)[3][2, ], 2)), digits = 2, format = "f"),
"]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(auc_label, " [", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][1, ], 2)), digits = 2, format = "f"),
", ", formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
AUC, alpha = 0.05)[3][2, ], 2)), digits = 2,
format = "f"), "]")), acc_label = paste0("Accuracy = ",
formatC(round(acc, 3) * 100, digits = 0, format = "f"),
"%"), acc_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0, format = "f"),
"%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0, format = "f"),
"%"), "]"), grepl("enterobacterales_rel_abundance", Input) ~
paste0(acc_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
acc, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), sens_label = paste0("Sensitivity = ",
formatC(round(sensitivity, 3) * 100, digits = 0, format = "f"),
"%"), sens_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(sens_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
sensitivity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]")), spec_label = paste0("Specificity = ",
formatC(round(specificity, 3) * 100, digits = 0, format = "f"),
"%"), spec_label = case_when(grepl("enterococcus_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[4][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"), grepl("enterobacterales_rel_abundance",
Input) ~ paste0(spec_label, " [", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][1, ], 2)) * 100, digits = 0,
format = "f"), "%"), ", ", paste0(formatC(pull(round(boot_ci(x = test_abundance[1][[1]],
specificity, alpha = 0.05)[3][2, ], 2)) * 100, digits = 0,
format = "f"), "%"), "]"))) %>%
filter(grepl(pattern = "rel_abundance", x = Input)) %>%
mutate(subgroup = str_to_title(subgroup), subgroup = gsub(pattern = "_",
replacement = " ", x = subgroup), subgroup = gsub(pattern = "rel abundance",
replacement = "Relative Abundance (%)", x = subgroup),
subgroup2 = ifelse(grepl(x = subgroup, pattern = "enterococcus",
ignore.case = TRUE), "Enterococcus", "Enterobacterales"),
subgroup2 = factor(subgroup2, levels = c("Enterococcus",
"Enterobacterales")), odds_label = paste0("OR = ",
ebac_infx_confusion_cnfs$odds_ratio[1])) %>%
filter(subgroup2 == "Enterobacterales") %>%
ggplot(aes(x = fpr, y = tpr, color = subgroup2)) + geom_line(size = 1.2) +
geom_text(aes(label = auc_label, x = 0.5, y = 0.16), show.legend = F,
hjust = 0) + geom_text(aes(label = acc_label, x = 0.5,
y = 0.12), show.legend = F, hjust = 0) + geom_text(aes(label = spec_label,
x = 0.5, y = 0.08), show.legend = F, hjust = 0) + geom_text(aes(label = sens_label,
x = 0.5, y = 0.04), show.legend = F, hjust = 0) + geom_text(aes(label = paste0("Cutpoint = ",
round((optimal_cutpoint), digits = 3) * 100, "%"), x = 0.5,
y = 0.2), hjust = 0, show.legend = F) + geom_text(aes(label = odds_label,
x = 0.5, y = 0), show.legend = F, hjust = 0) + theme_bw() +
theme(axis.text = et(color = "black", size = 12), axis.title = et(color = "black",
size = 14), legend.text = et(size = 12), legend.title = et(size = 14),
legend.spacing.y = unit(0.5, "cm"), legend.position = "none",
panel.grid = eb(), strip.text = et(size = 14, color = "red",
face = "bold"), strip.background = eb(), panel.border = eb(),
panel.spacing.y = unit(20, "mm"), axis.line.x = el(color = "black")) +
geom_vline(xintercept = -0.05) + geom_hline(yintercept = -0.05) +
geom_table_npc(data = ebac_infx_confusion_cnfs %>%
column_to_rownames(var = "Enterobacterales") %>%
select(Expansion, `No Expansion` = No.Expansion) %>%
mutate(Total = rowSums(.)), label = list(ebac_infx_confusion_cnfs %>%
column_to_rownames(var = "Enterobacterales") %>%
select(Expansion, `No Expansion` = No.Expansion) %>%
mutate(Total = rowSums(.))), npcx = 0.25, npcy = 0.5,
hjust = 0, vjust = 1, table.rownames = TRUE, table.theme = ttheme_minimal(core = list(bg_params = list(col = "red"),
fg_params = list(col = "red")), colhead = list(fg_params = list(col = "red",
fontface = "bold")), rowhead = list(fg_params = list(col = "red",
fontface = "bold")))) + scale_x_continuous(expand = expansion(add = c(0.001,
0.05))) + scale_y_continuous(expand = expansion(add = c(0.001,
0.05))) + facet_wrap(~subgroup2, ncol = 1, scales = "fixed") +
ylab("True Positive Rate\n") + xlab("\nFalse Positive Rate") +
scale_color_manual(values = c("red")) + guides(color = guide_legend("Groups",
byrow = T, override.aes = list(size = 5)))
fig_5d# Heatmap compounds and their categories
heatmap_lookup <- read.csv("./Data/qual_heatmap_lookup.csv",
stringsAsFactors = FALSE)
# Build heatmap compound list
heatmap_cmpds <- metab_qual_anon %>%
mutate(compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_lookup$compound) %>%
distinct(compound) %>%
drop_na()
qual_log2fc_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue != 0)) %>%
summarise(log2fc_val = log((mean(mvalue[enterococcus_expansion ==
"0"], na.rm = T)/mean(mvalue[enterococcus_expansion ==
"1"], na.rm = T)), base = 2)) # 0 = No Expansion, 1 = Expansion
qual_pval_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue != 0)) %>%
rstatix::wilcox_test(mvalue ~ enterococcus_expansion) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj")
qual_tot_ecoc_expan <- left_join(qual_log2fc_ecoc_expan, qual_pval_ecoc_expan) %>%
column_to_rownames(var = "compound")
# volcano label color
ecoc_expan_volcano_labcol <- qual_tot_ecoc_expan %>%
filter(p.adj <= 0.05 & abs(log2fc_val) >= 0.75) %>%
mutate(color = ifelse(log2fc_val < 0, "#389458", "gray47"))
# # Volcano Plot (adjusted) set.seed(456)
# volcano_adj_ecoc_expan <-
# EnhancedVolcano(qual_tot_ecoc_expan, lab =
# rownames(qual_tot_ecoc_expan), x = 'log2fc_val', y =
# 'p.adj', title = NULL, pCutoff = 0.05, FCcutoff = 0.75,
# pointSize = 6, labSize = 8, axisLabSize = 32, labCol =
# ecoc_expan_volcano_labcol$color, caption = NULL, colAlpha
# = 0.65, col = c('gray75', c('#D4CA15', '#912777',
# '#1238E3')), xlim = c(-2.5, 4), ylim = c(0, 8),
# legendPosition = 'none', legendLabels =
# c(expression(p.adj > 0.05*';' ~ Log[2] ~ FC <
# '\u00B1'*0.75), expression(p.adj > 0.05*';' ~ Log[2] ~ FC
# >= '\u00B1'*0.75), expression(p.adj <= 0.05*';' ~ Log[2]
# ~ FC < '\u00B1'*0.75), expression(p.adj <= 0.05*';' ~
# Log[2] ~ FC >= '\u00B1'*0.75)), legendLabSize = 14,
# boxedLabels = T, drawConnectors = T, widthConnectors =
# 0.2, arrowheads = F, gridlines.minor = F, gridlines.major
# = F, max.overlaps = Inf ) + theme( axis.text = et(color =
# 'black'), legend.text = et(hjust = 0), plot.margin =
# unit(c(0, 0, 0, 0), 'cm') ) + labs(subtitle = NULL) +
# annotate('segment', x = 0.8, xend = 3, y = 7.95, yend =
# 7.95, arrow = arrow(), size = 2, color = 'gray67') +
# annotate('text', x = 1.65, y = 8.15, label = 'No
# Expansion', size = 9, color = 'gray67') +
# annotate('rect', xmin = 0.75, xmax = Inf, ymin =
# -log(0.05, base = 10), ymax = Inf, alpha = .1, fill =
# 'gray87') + annotate('segment', x = -0.8, xend = -2.5, y
# = 7.95, yend = 7.95, arrow = arrow(), size = 2, color =
# '#389458') + annotate('text', x = -1.55, y = 8.15, label
# = 'Expansion', size = 9, color = '#389458') +
# annotate('rect', xmin = -0.75, xmax = -Inf, ymin =
# -log(0.05, base = 10), ymax = Inf, alpha = .1, fill =
# '#389458') + guides(color = guide_legend(nrow = 4), shape
# = guide_legend(nrow = 4)) + scale_y_continuous(expand =
# expansion(add = c(0, 0.15))) volcano_adj_ecoc_expan
# ggsave(plot = volcano_adj_ecoc_expan, filename =
# './Results/Figure_5A.pdf', width = 24, height = 11)# Using unadjusted p-values for down-selection of
# metabolites, show distribution of normalized peak area
boxplot_ecoc_expan <- metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound), compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound) %>%
left_join(peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance)) %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0), enterococcus_expansion = ifelse(enterococcus_expansion ==
1, "Expansion", "No Expansion")) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue[enterococcus_expansion ==
"No Expansion"]), enterococcus_expansion_1 = length(mvalue[enterococcus_expansion ==
"Expansion"])) %>%
filter(any(mvalue != 0)) %>%
right_join(qual_tot_ecoc_expan %>%
rownames_to_column(var = "compound") %>%
mutate(abs_log2fc_val = abs(log2fc_val)) %>%
filter(p <= 0.05, abs_log2fc_val >= 1))
if (nrow(boxplot_ecoc_expan) > 0) {
print(ggpar(ggboxplot(boxplot_ecoc_expan, x = "enterococcus_expansion",
y = "mvalue", color = "enterococcus_expansion", palette = c("#389458",
"gray87"), ylab = "Normalized Peak Area", xlab = "",
outlier.shape = NA), legend.title = "Enterococcus") +
stat_compare_means(label.y.npc = 0.75) + facet_wrap(~compound,
scales = "free_y") + geom_point(data = boxplot_ecoc_expan,
aes(x = enterococcus_expansion, y = mvalue, color = enterococcus_expansion),
position = position_jitter(width = 0.2), size = 2, alpha = 0.65))
ggsave(filename = "./Results/enterococcus_expansion_boxplot.pdf",
width = 24, height = 18)
} else {
print("no significant observations")
}# Custom colors
pirate_colors <- c("#1A49BE", "#3A001E")
pirate_colors2 <- c("#3A001E", "#3A001E", "#3A001E", "#1A49BE")
t_metaphlan <- metaphlan_df %>%
filter(sampleID %in% first_samps$sampleID | grepl(sampleID,
pattern = "hd")) %>%
mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant",
"Healthy Donor")) %>%
select(sampleID, taxid, db, pctseqs, Total) %>%
group_by(sampleID, taxid, pctseqs) %>%
slice(1) %>%
ungroup() %>%
filter(pctseqs >= 1e-04) %>%
group_by(sampleID) %>%
dplyr::add_count(taxid, name = "totalSp") %>%
mutate(sampleID_count = length(unique(sampleID)), spPres = totalSp/sampleID_count) %>%
filter(spPres >= 0.1) %>%
select(-c(Total, sampleID_count, spPres, totalSp)) %>%
group_by(sampleID) %>%
mutate(pctseqs = pctseqs/sum(pctseqs))
t_metaphlan_mat <- t_metaphlan %>%
distinct() %>%
pivot_wider(names_from = "taxid", values_from = "pctseqs",
values_fill = 0) %>%
column_to_rownames(var = "sampleID") %>%
select(-db)
# taxUMAP microbiota table
taxumap_microbiota <- t_metaphlan_mat %>%
rownames_to_column(var = "index_column") %>%
pivot_longer(-index_column, names_to = "taxid", values_to = "pctseq") %>%
mutate(taxid = paste0("taxID", taxid)) %>%
pivot_wider(names_from = "taxid", values_from = "pctseq")
write.csv(taxumap_microbiota, "./Results/microbiota_table.csv",
row.names = FALSE)
# taxUMAP taxonomy table
taxumap_taxonomy <- t_metaphlan_mat %>%
rownames_to_column(var = "index_column") %>%
pivot_longer(-index_column, names_to = "taxid", values_to = "pctseq") %>%
left_join(tax_lookup %>%
mutate(taxid = as.character(taxid))) %>%
mutate(taxid = paste0("taxID", taxid)) %>%
distinct(Kingdom, Phylum, Class, Order, Family, Genus, Species,
taxid) %>%
transmute(OTU = taxid, Kingdom, Phylum = if_else(Phylum ==
"", "unclassified", Phylum), Class = if_else(Class ==
"", "unclassified", Class), Order = if_else(Order ==
"", "unclassified", Order), Family = if_else(Family ==
"", "unclassified", Family), Genus = if_else(Genus ==
"", "unclassified", Genus), Genus = gsub(" ", "_", Genus),
Species = if_else(Species == "", "unclassified", Species),
Species = gsub(" ", "_", Species))
write.csv(taxumap_taxonomy, "./Results/taxonomy.csv", row.names = FALSE)import sys
print(sys.path)
# USER MUST SET THEIR PATH TO THEIR LOCALLY INSTALLED TAXUMAP LOCATION## ['', '/usr/bin', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python38.zip', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/lib-dynload', '/Users/nick/Library/Python/3.8/lib/python/site-packages', '/opt/anaconda3/bin/taxumap', '/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/site-packages', '/Library/Frameworks/R.framework/Versions/4.2/Resources/library/reticulate/python']
sys.path.insert(0, '/Users/nick/Documents/taxumap/taxumap/')
from taxumap.taxumap_base import Taxumap
# From file
tu = Taxumap(taxonomy='./Results/taxonomy.csv', microbiota_data='./Results/microbiota_table.csv',random_state=456)
# Transform the data (an inplace function)## Phylum Class
## not monophyletic
## Class Order
## not monophyletic
## Order Family
## not monophyletic
## Family Genus
## not monophyletic
## post validate inputs main Kingdom ... Species
## ASV ...
## taxID817 Bacteria ... Bacteroides_fragilis
## taxID818 Bacteria ... Bacteroides_thetaiotaomicron
## taxID820 Bacteria ... Bacteroides_uniformis
## taxID821 Bacteria ... Phocaeicola_vulgatus
## taxID823 Bacteria ... Parabacteroides_distasonis
## ... ... ... ...
## taxID28447 Bacteria ... Clavibacter_michiganensis
## taxID33968 Bacteria ... Leuconostoc_pseudomesenteroides
## taxID709323 Bacteria ... Fructobacillus_tropaeoli
## taxID1070421 Bacteria ... Periweissella_fabalis
## taxID2749962 Bacteria ... Lactococcus_paracarnosus
##
## [672 rows x 7 columns]
##
## /opt/anaconda3/bin/taxumap/taxumap/dataloading.py:86: UserWarning: Reading taxonomy table. Assuming columns are ordered by phylogeny with in descending order of hierarchy:
## e.g. Kingdom, Phylum, ... , Genus, Species, etc.
## Additionally, the OTU or ASV column must be labeled as 'OTU' or 'ASV' unless otherwise specified
## warnings.warn(
tu.transform_self(neigh=55)
# Raw embedding dataframe## Taxumap(agg_levels = ['Phylum', 'Family'], weights = [1, 1])
##
## /opt/anaconda3/bin/taxumap/taxumap/taxumap_base.py:91: UserWarning: Setting min_dist to 0.05/sum(weights)
## warnings.warn("Setting min_dist to 0.05/sum(weights)")
## /opt/anaconda3/bin/taxumap/taxumap/taxumap_base.py:102: UserWarning: Setting epochs to 5000
## warnings.warn("Setting epochs to %d" % epochs)
## /opt/anaconda3/bin/taxumap/taxumap/tools.py:47: UserWarning: aggregating on Phylum
## warnings.warn("aggregating on %s" % agg_level)
## /opt/anaconda3/bin/taxumap/taxumap/tools.py:47: UserWarning: aggregating on Family
## warnings.warn("aggregating on %s" % agg_level)
embedding_df = tu.df_embedding#### Alpha Diversity ####
# Alpha diversity matrix: Inverse Simpson
alpha_invsim <- vegan::diversity(t_metaphlan_mat, index = "invsimpson") %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("InvSimpson" = ".")
# Alpha Diversity matrix: Shannon
alpha_shannon <- vegan::diversity(t_metaphlan_mat, index = "shannon") %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("Shannon" = ".")
# Alpha Diversity matrix: Observed ASVs
alpha_richness <- vegan::specnumber(t_metaphlan_mat) %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
dplyr::rename("Richness" = ".")
#### taxUMAP #####
# Find most abundant taxa per sample and plot just that
top_tax <- t_metaphlan %>%
group_by(sampleID) %>%
slice_max(pctseqs, n = 1) %>%
left_join(tax_lookup) %>%
mutate(across(everything(), ~ifelse(.=="", NA, as.character(.)))) %>%
replace_na(list(Species="unclassified",
Genus="unclassified",
Family="unclassified",
Order="unclassified",
Class="unclassified",
Phylum="unclassified")) %>%
mutate(Genus=ifelse(Genus=="unclassified",
paste(Family,Genus,sep="\n"),
as.character(Genus)),
pctseqs = as.numeric(pctseqs))
metaphlan_df2 <- t_metaphlan %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
left_join(tax_lookup) %>%
drop_na(taxid) %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum,"-",Order,"-", Family, "-",Genus)) %>%
left_join(alpha_shannon) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs),
y.text = (cum.pct + c(0, cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct)
metaphlan_df_sumry <-
metaphlan_df2 %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
ungroup() %>%
mutate(healthy_min_shannon = min(Shannon[db == "Healthy Donor"]),
diversity_group = case_when(
db == "Healthy Donor" ~ "Healthy Donor",
Shannon >= healthy_min_shannon ~ "High Diversity",
TRUE ~ "TBD"
),
lt_med_shannon = median(Shannon[diversity_group == "TBD"], na.rm = TRUE),
diversity_group = case_when(
diversity_group %in% c("Healthy Donor", "High Diversity") ~ diversity_group,
Shannon >= lt_med_shannon ~ "Medium Diversity",
TRUE ~ "Low Diversity"
),
diversity_group = factor(
diversity_group,
levels = c(
"Low Diversity",
"Medium Diversity",
"High Diversity",
"Healthy Donor"
)
),
diversity_group_abv = gsub(pattern = " Diversity",
replacement = "",
x = diversity_group),
diversity_group_abv = factor(diversity_group_abv,
levels = c("Low", "Medium", "High", "Healthy Donor"))) %>%
arrange(Genus)
tax_umap_mat_plot <- py$embedding_df %>%
as.data.frame() %>%
rownames_to_column(var = "sampleID") %>%
left_join(t_metaphlan %>%
distinct(sampleID, db)) %>%
left_join(top_tax) %>%
ggplot(aes(x = taxumap1, y = taxumap2, color = Genus, shape = db, size = pctseqs*100, alpha = db))+
geom_point(fill = "black")+
theme_bw() +
theme(panel.grid = eb(),
axis.title = et(color = "black", size = 14),
axis.text = et(color = "black", size = 12),
axis.line = el(color = "black"),
# legend.title = et(color = "black", size = 14),
# legend.text = et(color = "black", size = 12),
legend.position = "right"
) +
xlab("taxUMAP1") +
ylab("taxUMAP2") +
guides(
shape = guide_legend(
title = "Cohort",
override.aes = list(size = 4),
order = 1,
ncol = 1,
title.position = "top"
),
size = guide_legend(
title = "Relative Abundance (%)",
order = 2,
ncol = 1,
title.position = "top"
),
color = "none",
fill = "none",
alpha = "none"
)+
scale_color_manual(values = metaphlan_pal)+
scale_shape_manual(values = c(24, 16))+
scale_alpha_manual(values = c(1, 0.45))
tax_umap_mat_plotggsave(plot = tax_umap_mat_plot,
filename = "./Results/Figure_1B.pdf",
height = 8, width = 8)
#### Alpha Diversity Stats #####
# Obtain stats for alpha diversity
diversity_comps <- list(
c("Healthy Donor", "High"),
c("High", "Medium"),
c("High", "Low"),
c("Medium", "Low")
)
alpha_stats <- alpha_invsim %>%
left_join(alpha_shannon) %>%
left_join(alpha_richness) %>%
inner_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv)
) %>%
pivot_longer(!c(sampleID, db, diversity_group_abv),names_to = "diversity_metric", values_to = "value") %>%
group_by(diversity_metric) %>%
rstatix::wilcox_test(value~diversity_group_abv,
comparisons = diversity_comps,
p.adjust.method = "BH",
alternative= "two.sided"
) %>%
ungroup()
# Create dataframe for all phylogentic levels of interest
phylo_rel_abd <- t_metaphlan %>%
left_join(tax_lookup) %>%
inner_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv)
) %>%
mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "|")) %>%
filter(grepl(pattern = "Enterococcus|Enterobacterales|Bacteroidetes|Lachnospiraceae|Oscillospiraceae", x = Species)) %>%
mutate(organism = case_when(grepl(pattern = "Enterococcus", x = Species) ~ "Enterococcus",
grepl(pattern = "Enterobacterales", x = Species) ~ "Enterobacterales",
grepl(pattern = "Bacteroidetes", x = Species) ~ "Bacteroidetes",
grepl(pattern = "Lachnospiraceae", x = Species) ~ "Lachnospiraceae",
grepl(pattern = "Oscillospiraceae", x = Species) ~ "Oscillospiraceae")) %>%
select(sampleID, db, diversity_group_abv, organism, pctseqs) %>%
group_by(sampleID, db, diversity_group_abv, organism) %>%
summarise(pctseqs = sum(pctseqs)) %>%
ungroup() %>%
pivot_wider(names_from = organism, values_from = pctseqs, values_fill = 0) %>%
pivot_longer(!c(sampleID, db, diversity_group_abv), names_to = "organism", values_to = "pctseqs")
# Obtain stats for all phylogentic levels of interest
rel_abd_alpha_stats <- phylo_rel_abd %>%
group_by(organism) %>%
rstatix::wilcox_test(pctseqs~diversity_group_abv) %>%
bind_rows(alpha_stats) %>%
rstatix::adjust_pvalue(method = "BH") %>%
mutate(p.adj = ifelse(p.adj < 0.001, 0.001, round(p.adj, 3)))
write.csv(rel_abd_alpha_stats, "./Results/Figure_1_Statistics.csv", row.names = FALSE)
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), symbols = c("****", "***", "**", "*", "ns"))
diversity_group_colors <- c("#3A001E", "#8A0246", "#C20463", "#1A49BE")
# #### Plot Inverse Simpson ####
#
#
# set.seed(456)
# gg_alpha_invsim <- alpha_invsim %>%
# inner_join(t_metaphlan %>%
# distinct(sampleID, db) %>%
# select(sampleID, db)
# ) %>%
# inner_join(metaphlan_df %>%
# left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
# distinct(sampleID, db, diversity_group, diversity_group_abv)
# ) %>%
# mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
# ggplot(., aes(x = diversity_group_abv,
# y = InvSimpson,
# color = diversity_group_abv,
# fill = db)) +
# geom_boxplot(outlier.colour = NA,
# alpha = 0.35) +
# geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
# stat_compare_means(comparisons = diversity_comps,
# tip.length = 0.01,
# step.increase = 0.075,
# symnum.args = symnum.args,
# method.args = list(alternative = "two.sided",
# exact = FALSE)
# ) +
# theme_bw() +
# theme(
# panel.grid = eb(),
# axis.title.y = et(size = 14, color = "black"),
# axis.title.x = eb(),
# axis.text = et(size = 12, color = "black"),
# plot.margin = margin(t = 5, # Top margin
# r = 5, # Right margin
# b = 5, # Bottom margin
# l = 5), # Left margin
# panel.border = eb(),
# axis.line = el(color = 'black')
# ) +
# ylab("Alpha Diversity\n(Inverse Simpson)") +
# scale_fill_manual(values = rev(pirate_colors)) +
# scale_color_manual(values = diversity_group_colors) +
# guides(fill = guide_legend("Cohort"),
# color = guide_legend("Diversity Group",
# override.aes = aes(label = "")))+
# scale_y_continuous(breaks = seq(0,45,5),
# expand = expansion(mult = c(0.01, 0.1))) +
# coord_cartesian(xlim = c(1.1,3.9))
#
#
# gg_alpha_invsim
#
# pdf(file = "./Results/Figure_1C.pdf", height = 6, width = 7)
# gg_alpha_invsim
# invisible(dev.off())
#
# #### Plot Shannon Diversity ####
# set.seed(456)
# gg_alpha_shannon <- alpha_shannon %>%
# inner_join(t_metaphlan %>%
# distinct(sampleID, db) %>%
# select(sampleID, db)
# ) %>%
# inner_join(metaphlan_df %>%
# left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
# distinct(sampleID, db, diversity_group, diversity_group_abv)
# ) %>%
# mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
# ggplot(., aes(x = diversity_group_abv,
# y = Shannon,
# color = diversity_group_abv,
# fill = db)) +
# geom_boxplot(outlier.colour = NA,
# alpha = 0.35) +
# geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
# stat_compare_means(comparisons = diversity_comps,
# tip.length = 0.01,
# step.increase = 0.075,
# symnum.args = symnum.args,
# method.args = list(alternative = "two.sided",
# exact = FALSE)
# ) +
# theme_bw() +
# theme(
# panel.grid = eb(),
# axis.title.y = et(size = 14, color = "black"),
# axis.title.x = eb(),
# axis.text = et(size = 12, color = "black"),
# plot.margin = margin(t = 5, # Top margin
# r = 5, # Right margin
# b = 5, # Bottom margin
# l = 5), # Left margin
# panel.border = eb(),
# axis.line = el(color = 'black')
# ) +
# ylab("Alpha Diversity\n(Shannon)") +
# scale_fill_manual(values = rev(pirate_colors)) +
# scale_color_manual(values = diversity_group_colors) +
# guides(fill = guide_legend("Cohort"),
# color = guide_legend("Diversity Group",
# override.aes = aes(label = "")))+
# scale_y_continuous(breaks = seq(0,6.5,1),
# expand = expansion(mult = c(0.01, 0.1))) +
# coord_cartesian(xlim = c(1.1,3.9))
#
#
# gg_alpha_shannon
#
# pdf(file = "./Results/Figure_1D.pdf", height = 6, width = 7)
# gg_alpha_shannon
# invisible(dev.off())
#
# #### Plot Species Richness ####
# set.seed(456)
# gg_alpha_richness <- alpha_richness %>%
# inner_join(t_metaphlan %>%
# distinct(sampleID, db) %>%
# select(sampleID, db)
# ) %>%
# inner_join(metaphlan_df %>%
# left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group, diversity_group_abv)) %>%
# distinct(sampleID, db, diversity_group, diversity_group_abv)
# ) %>%
# mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
# ggplot(., aes(x = diversity_group_abv,
# y = Richness,
# color = diversity_group_abv,
# fill = db)) +
# geom_boxplot(outlier.colour = NA,
# alpha = 0.35) +
# geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
# stat_compare_means(comparisons = diversity_comps,
# tip.length = 0.01,
# label.y = c(145, 160, 150, 110),
# symnum.args = symnum.args,
# method.args = list(alternative = "two.sided",
# exact = FALSE)
# ) +
# theme_bw() +
# theme(
# panel.grid = eb(),
# axis.title.y = et(size = 14, color = "black"),
# axis.title.x = eb(),
# axis.text = et(size = 12, color = "black"),
# plot.margin = margin(t = 5, # Top margin
# r = 5, # Right margin
# b = 5, # Bottom margin
# l = 5), # Left margin
# panel.border = eb(),
# axis.line = el(color = 'black')
# ) +
# ylab("Alpha Diversity\n(Richness)") +
# scale_fill_manual(values = rev(pirate_colors)) +
# scale_color_manual(values = diversity_group_colors) +
# guides(fill = guide_legend("Cohort"),
# color = guide_legend("Diversity Group",
# override.aes = aes(label = "")))+
# scale_y_continuous(breaks = seq(0,180,50),
# expand = expansion(mult = c(0.01, 0.035))) +
# coord_cartesian(xlim = c(1.1,3.9))
#
#
# gg_alpha_richness
#
# pdf(file = "./Results/Figure_1E.pdf", height = 6, width = 7)
# gg_alpha_richness
# invisible(dev.off())
#### Plot Lachnospiraceae ####
set.seed(456)
gg_lach_rel_abd <- phylo_rel_abd %>%
filter(organism == "Lachnospiraceae") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.65, 0.71, 0.76, 0.82)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Lachnospiraceae")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_lach_rel_abd
# pdf(file = "./Results/Figure_1F.pdf", height = 6, width = 7)
# gg_lach_rel_abd
# invisible(dev.off())
#### Plot Bacteroidetes ####
set.seed(456)
gg_bact_rel_abd <- phylo_rel_abd %>%
filter(organism == "Bacteroidetes") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.65, 0.98, 0.92, 1.02)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Bacteroidetes")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_bact_rel_abd
# pdf(file = "./Results/Figure_1I.pdf", height = 6, width = 7)
# gg_bact_rel_abd
# invisible(dev.off())
#### Plot Oscillospiraceae ####
set.seed(456)
gg_oscl_rel_abd <- phylo_rel_abd %>%
filter(organism == "Oscillospiraceae") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.33, 0.52, 0.59, 0.66)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Oscillospiraceae")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_oscl_rel_abd
# pdf(file = "./Results/Figure_1J.pdf", height = 6, width = 7)
# gg_oscl_rel_abd
# invisible(dev.off())
#### Plot Enterococcus ####
set.seed(456)
gg_ecoc_rel_abd <- phylo_rel_abd %>%
filter(organism == "Enterococcus") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.20, 0.75, 0.985, 1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Enterococcus")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_ecoc_rel_abd
# pdf(file = "./Results/Figure_1G.pdf", height = 6, width = 7)
# gg_ecoc_rel_abd
# invisible(dev.off())
#### Plot Enterobacterales #####
set.seed(456)
gg_ebac_rel_abd <- phylo_rel_abd %>%
filter(organism == "Enterobacterales") %>%
group_by(organism) %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor"))) %>%
ggplot(., aes(x = diversity_group_abv,
y = pctseqs,
color = diversity_group_abv,
fill = db)) +
geom_boxplot(outlier.colour = NA,
alpha = 0.35) +
geom_jitter(width = 0.2, size = 2.5, alpha = 0.65)+
stat_compare_means(comparisons = diversity_comps,
tip.length = 0.01,
symnum.args = symnum.args,
method.args = list(alternative = "two.sided",
exact = FALSE),
label.y = c(0.20, 0.75, 0.985, 1.05)
) +
theme_bw() +
theme(
panel.grid = eb(),
axis.title.y = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.text = et(size = 12, color = "black"),
plot.margin = margin(t = 5, # Top margin
r = 5, # Right margin
b = 5, # Bottom margin
l = 5), # Left margin
panel.border = eb(),
axis.line = el(color = 'black')
) +
ylab(~atop(paste(italic("Enterobacterales")), paste("MetaPhlAn4 Relative Abundance"))) +
scale_fill_manual(values = rev(pirate_colors)) +
scale_color_manual(values = diversity_group_colors) +
guides(fill = guide_legend("Cohort"),
color = guide_legend("Diversity Group",
override.aes = aes(label = "")))+
scale_y_continuous(breaks = seq(0,1,0.1),
limits = c(-0.01,1.1),
expand = expansion(mult = c(0.01, 0.1)),
labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(xlim = c(1.1,3.9))
# gg_ebac_rel_abd
# pdf(file = "./Results/Figure_1H.pdf", height = 6, width = 7)
# gg_ebac_rel_abd
# invisible(dev.off())
#### Plot Metaphlan Relative Abundance ####
# Figure 1A-MetaPhlAn4 Taxonomy
# Load legend
tax_legend <- magick::image_read_pdf("./Data/legend.v2.pdf")
gg_tax_legend <- cowplot::ggdraw() + cowplot::draw_image(tax_legend)
metaphlan_pal2 <- getRdpPal(metaphlan_df2)
gg_metaphlan <- metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
ungroup() %>%
mutate(Genus = factor(Genus, levels = unique(Genus))) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
ggplot(aes(x=reorder(sampleID, Shannon),y=pctseqs)) +
geom_bar(stat="identity",aes(fill=Genus), width = 0.9) +
scale_fill_manual(values = metaphlan_pal2) +
theme_bw() +
theme(legend.position = "none",
axis.text.x=eb(),
axis.ticks.x=eb(),
strip.text.x= et(angle=0,size=14),
strip.background = eb(),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 5,
r = 5,
b = 0,
l = 5)) +
facet_grid(. ~diversity_group, scales = "free", space = "free")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005)),
labels = scales::percent_format(accuracy = 1)) +
ylab("MetaPhlAn4 Relative Abundance") +
xlab("")
# Color facets
gg_metaphlan_grob <- ggplot_gtable(ggplot_build(gg_metaphlan))
strip_both <- which(grepl('strip-', gg_metaphlan_grob$layout$name))
fills <- diversity_group_colors
k <- 1
for (i in strip_both) {
l <- which(grepl('titleGrob', gg_metaphlan_grob$grobs[[i]]$grobs[[1]]$childrenOrder))
gg_metaphlan_grob$grobs[[i]]$grobs[[1]]$children[[l]]$children[[1]]$gp$col <- fills[k]
k <- k+1
}
gg_shannon <- metaphlan_df_sumry %>%
ggplot(aes(x=reorder(sampleID, Shannon), y = Shannon)) +
geom_bar(stat="identity",aes(fill=diversity_group_abv), width = 0.9) +
theme_bw() +
theme(legend.position = "none",
axis.text.x = eb(),
axis.title.x = eb(),
axis.ticks.x = eb(),
strip.text = eb(),
strip.background = er(fill = "white"),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 0,
r = 5,
b = 0,
l = 5),
panel.grid = eb()) +
scale_fill_manual(values = diversity_group_colors) +
facet_grid(. ~diversity_group, scales = "free", space = "free")
gg_metaphlan_shannon <-
plot_grid(
gg_metaphlan_grob,
gg_shannon,
axis = "lb",
align = "hv",
nrow = 2,
rel_heights = c(1, 0.15)
)
pdf(file = "./Results/Figure_1A.pdf", width = 12.25, height = 8)
gg_metaphlan_shannon
invisible(dev.off())
#### Combine all into a single figure 1 Start ####
alpha_org_plot <- plot_grid(
# gg_alpha_invsim + theme(legend.position = "none",
# axis.text.x = eb(),
# axis.title.x = eb(),
# plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
# gg_alpha_shannon + theme(legend.position = "none",
# axis.text.x = eb(),
# axis.title.x = eb(),
# plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
# gg_alpha_richness+ theme(legend.position = "none",
# axis.text.x = eb(),
# axis.title.x = eb(),
# plot.margin = unit(c(0.1, 0, 0.15, 0), "cm")),
gg_lach_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
axis.title.x = eb(),
plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
gg_bact_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
gg_oscl_rel_abd + theme(axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.05, 0, 0, 0), "cm"),
legend.position = "right"),
gg_ecoc_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
gg_ebac_rel_abd + theme(legend.position = "none",
axis.text.x = et(angle = 45, hjust = 0.85, vjust = 0.85),
plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
tax_umap_mat_plot +
theme(plot.margin = unit(c(0.05, 0, 0, 0), "cm")),
nrow = 2,
axis = "lb",
align = "v",
rel_widths = c(1, 1, 1.3),
rel_heights = c(1,1)
)
# This is useful to figure out the general layout of the plots
gs <- lapply(1:3, function(ii)
grobTree(rectGrob(gp=gpar(fill=ii, alpha=0.5)), textGrob(ii)))
# grid.arrange(grobs=gs, ncol=4,
# top="top label", bottom="bottom\nlabel",
# left="left label", right="right label")
lay <- rbind(c(1,1,1,1,1,NA),
c(1,1,1,1,1,2),
c(1,1,1,1,1,NA),
c(3,3,3,3,3,3),
c(3,3,3,3,3,3),
c(3,3,3,3,3,3),
c(3,3,3,3,3,3))
# grid.arrange(grobs = gs, layout_matrix = lay)
pdf(file = "./Results/Figure_1.pdf", height = 16, width = 20)
grid.arrange(
gg_metaphlan_shannon, # 1
gg_tax_legend, # 2
# tax_umap_mat_plot +
# theme(
# legend.text = et(size = 10),
# legend.title = et(size = 12),
# plot.margin = margin(
# t = 5, # Top margin
# r = 0, # Right margin
# b = 75, # Bottom margin
# l = 5 # Left margin
# )
# ), # 2
alpha_org_plot, # 3
layout_matrix = lay
)
invisible(dev.off())
grid.arrange(
gg_metaphlan_shannon, # 1
gg_tax_legend, # 2
# tax_umap_mat_plot +
# theme(
# legend.text = et(size = 10),
# legend.title = et(size = 12),
# plot.margin = margin(
# t = 5, # Top margin
# r = 0, # Right margin
# b = 75, # Bottom margin
# l = 5 # Left margin
# )
# ), # 2
alpha_org_plot, # 3
layout_matrix = lay
)#### Combine all into a single figure 1 End ####gg_metaphlan_pathos <- metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
mutate(Species = paste(Kingdom, Phylum, Class, Order, Family, Genus, Species, sep = "-")) %>%
ungroup() %>%
filter(diversity_group != "Healthy Donor") %>%
mutate(Species = case_when(grepl(x = Species, pattern = "Enterococcus") ~ "Enterococcus",
grepl(x = Species, pattern = "Enterobacterales") ~ "Enterobacterales",
TRUE ~ "Other")) %>%
group_by(sampleID, diversity_group, Shannon, Species) %>%
summarise(pctseqs = sum(pctseqs)) %>%
mutate(pctseqs = ifelse(Species == "Other", 0, pctseqs)) %>%
ungroup() %>%
arrange(Species) %>%
mutate(Species = forcats::fct_relevel(Species)) %>%
group_by(sampleID) %>%
arrange(Species) %>% #distinct(sampleID)
ggplot(aes(x=reorder(sampleID, Shannon),y=pctseqs)) +
geom_bar(stat="identity",aes(fill=Species), width = 0.9) +
scale_fill_manual("Pathobiont",
values = c("#FF0000", "#0C7A3A", "#00000000"),
breaks = c("Enterobacterales", "Enterococcus", "")) +
# paletteer::scale_fill_paletteer_d(palette = "vapeplot::vaporwave"
# # "rcartocolor::Antique"
# # "MetBrewer::Renoir"
# ) +
theme_bw() +
theme(legend.position = "right",
axis.text.x=eb(),
axis.ticks.x=eb(),
strip.text.x= et(angle=0,size=14),
strip.background = eb(),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
panel.grid.minor = eb(),
panel.grid.major.y = eb()) +
facet_grid(. ~diversity_group, scales = "free_x")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005)),
labels = scales::percent_format(accuracy = 1)) +
ylab("MetaPhlAn4 Relative Abundance") +
xlab("")
# gg_metaphlan_pathos
#### Bile Acid Genes ####
ba_genes_wilcox_test <- ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
group_by(gene) %>%
wilcox_test(tpm ~ diversity_group)
ba_genes_wilcox_effect <- ba_genes %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
group_by(gene) %>%
wilcox_effsize(tpm ~ diversity_group)
gg_ba_genes <- ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
ungroup() %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = tpm, fill = diversity_group)) +
geom_col() +
theme_bw() +
theme(legend.position = "none",
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = eb(),
strip.text.y = et(angle = 0, size = 14, hjust = 0),
strip.background = eb(),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 5,
r = 5,
b = 0,
l = 5)) +
scale_fill_manual(values = diversity_group_colors) +
facet_grid(gene~diversity_group, scales = "free_x")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005))) +
ylab("Gene Abundance (TPM)") +
xlab("")
# gg_ba_genes
pdf(file = "./Results/Metaphlan_Bile_Acid.pdf", height = 26, width = 20, onefile = FALSE)
gg.stack(gg_metaphlan,
gg_ba_genes,
heights = c(1, 8))
dev.off()## quartz_off_screen
## 2
# # Save gg_metaphlan as plotly html
# htmlwidgets::saveWidget(plotly::ggplotly(metaphlan_df2 %>%
# left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
# ungroup() %>%
# mutate(Genus = paste(Genus, Species, sep = "-"),
# Genus = factor(Genus, levels = unique(Genus))) %>%
# group_by(sampleID) %>%
# arrange(Genus) %>%
# ggplot(aes(x=reorder(sampleID, Shannon),y=pctseqs)) +
# geom_bar(stat="identity",aes(fill=Genus), width = 0.9) +
# scale_fill_continuous(type = "viridis") +
# theme_bw() +
# theme(legend.position = "none",
# axis.text.x=eb(),
# axis.ticks.x=eb(),
# strip.text.x= et(angle=0,size=14),
# strip.background = eb(),
# axis.title.y = et(color = "black", size = 14),
# axis.text.y = et(color = "black", size = 12),
# panel.spacing = unit(0.5, "lines"),
# plot.margin = margin(t = 5,
# r = 5,
# b = 0,
# l = 5)) +
# facet_grid(. ~diversity_group, scales = "free", space = "free")+
# scale_y_continuous(expand = expansion(mult = c(0.005,0.005)),
# labels = scales::percent_format(accuracy = 1)) +
# ylab("MetaPhlAn4 Relative Abundance") +
# xlab("")), "./Results/Metaphlan_Species.html")
# Stats
gg_bile_gene_stats <-
ggstatsplot::grouped_ggbetweenstats(
data = ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(
gene,
levels = c(
"BaiA",
"BaiA1",
"BaiA2",
"BaiB",
"BaiCD",
"BaiE",
"BaiF",
"BaiG",
"BaiH",
"BaiI",
"BSH",
"3aHSDH",
"3bHSDH",
"7aHSDH",
"7bHSDH",
"12abHSDH",
"5AR",
"5BR"
)
)) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)),
x = diversity_group,
y = tpm,
grouping.var = gene,
ggsignif.args = list(textsize = 4, tip_length = 0.01),
p.adjust.method = "BH",
type = "non-parametric",
# pairwise.comparisons = FALSE,
pairwise.display = "significant",
results.subtitle = FALSE,
ggplot.component = list(scale_fill_manual(values = diversity_group_colors),
scale_color_manual(values = diversity_group_colors)),
plotgrid.args = list(nrow = 3),
annotation.args = list(title = "Bile Acid Gene")
) #+
# geom_signif(comparisons = list(c("Medium Diversity", "High Diversity"),
# c("Medium Diversity", "Low Diversity")),
# test = "wilcox.test")
pdf(file = "./Results/Bile_Acid_Stats.pdf", height = 20, width = 40, onefile = FALSE)
gg_bile_gene_stats
dev.off()## quartz_off_screen
## 2
# Bile Genes Heatmap
gg_ba_genes_heatmap <-
ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR")),
gene = forcats::fct_rev(gene)) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
group_by(gene) %>%
mutate(tpm_zscore = scale(tpm, scale = TRUE, center = TRUE),
tpm_log10 = log(tpm, base = 10),
tpm_log10 = ifelse(is.infinite(tpm_log10), 0, tpm_log10)) %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = gene, fill = tpm_log10)) +
geom_tile(stat = "identity", color = "gray") +
theme_bw() +
theme(
panel.grid = eb(),
plot.background = eb(),
panel.border = er(colour = "black",
fill = NA,
linewidth = 0.5),
axis.title = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = eb(),
strip.text.y = et(angle = 0, size = 14, hjust = 0),
strip.background = eb(),
axis.line = el(color = "black")
) +
facet_grid(. ~diversity_group, scales = "free_x")+
ylab("Gene\n") +
xlab("") +
guides(fill = guide_legend(title = "TPM",
reverse = TRUE)) +
scale_fill_gradient2(low = "white",
mid= "cyan1",
high = "#0c0970",
midpoint = 3.5,
breaks = seq(1, 7, by = 1),
# labels = c("1", "2", "3", "4", "5", "6", "7"),
labels = scales::label_math(),
limits = c(0, 7)
) +
# scale_fill_fermenter(palette = "YlGnBu", direction = 1) +
# scale_x_discrete(expand=c(0,0))+
scale_y_discrete(expand=c(0,0))
gg_ba_genes_bars <-
ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
mutate(pres_abs = ifelse(tpm > 0, 1, 0)) %>%
group_by(sampleID, Shannon, diversity_group) %>%
summarise(pres_abs = sum(pres_abs)) %>%
# summarise(tot_pres_abs = sum(pres_abs)) %>%
# mutate(pct_pres = tot_pres_abs / length(unique(ba_genes$gene))) %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = pres_abs, fill = diversity_group)) +
geom_col() +
geom_hline(yintercept = 18, linetype = "dashed") +
# annotate(geom = "text", x = 10, y = 18.5, label = "18 Genes Total") +
theme_bw() +
theme(
panel.grid = eb(),
plot.background = eb(),
panel.border = er(colour = "black",
fill = NA,
linewidth = 0.5),
axis.title = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = eb(),
strip.text.y = et(angle = 0, size = 14, hjust = 0),
strip.background = eb(),
axis.line = el(color = "black"),
legend.position = "none"
) +
facet_grid(. ~diversity_group, scales = "free_x")+
scale_fill_manual(values = diversity_group_colors) +
ylab("Genes Present (Total) \n") +
xlab("") +
guides(fill = guide_legend(title = "Diversity Group",
reverse = TRUE)) +
# scale_x_discrete(expand=c(0,0)) +
scale_y_continuous(expand = c(0,0),
breaks = c(seq(0,15,5), 18),
limits = c(0,20),
labels = c("0", "5", "10", "15", "18 Total Genes"))
ba_genes_stats <- ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
mutate(pres_abs = ifelse(tpm > 0, 1, 0)) %>%
group_by(sampleID, Shannon, diversity_group) %>%
summarise(tot_pres_abs = sum(pres_abs)) %>%
mutate(pct_pres = tot_pres_abs / length(unique(ba_genes$gene))) %>%
ungroup() %>%
kruskal_test(pct_pres ~ diversity_group)
# wilcox_test(pct_pres ~ diversity_group, comparisons = list(
# c("High Diversity", "Medium Diversity"),
# c("High Diversity", "Low Diversity"),
# c("Medium Diversity", "Low Diversity")
# ))
genes_stats_anno <- data.frame(diversity_group = c("Low Diversity",
"Medium Diversity",
"High Diversity"),
pct_pres = c(0.9, NA, NA),
label = c(paste0("Kruskal-Wallis, p = ", scales::scientific(ba_genes_stats$p)),
NA,
NA)) %>%
mutate(diversity_group = factor(diversity_group, levels = c("Low Diversity",
"Medium Diversity",
"High Diversity")))
gg_ba_genes_box <-
ba_genes %>%
filter(grepl(patientID, pattern = "^lt")) %>%
mutate(gene = factor(gene, levels = c("BaiA", "BaiA1", "BaiA2",
"BaiB", "BaiCD", "BaiE",
"BaiF", "BaiG", "BaiH",
"BaiI", "BSH",
"3aHSDH", "3bHSDH",
"7aHSDH", "7bHSDH",
"12abHSDH",
"5AR", "5BR"))) %>%
filter(sampleID %in% metaphlan_df2$sampleID) %>%
left_join(metaphlan_df_sumry %>% select(sampleID, Shannon, diversity_group)) %>%
mutate(pres_abs = ifelse(tpm > 0, 1, 0)) %>%
group_by(sampleID, Shannon, diversity_group) %>%
summarise(tot_pres_abs = sum(pres_abs)) %>%
mutate(pct_pres = tot_pres_abs / length(unique(ba_genes$gene))) %>%
ggplot(aes(x = diversity_group, y = pct_pres, fill = diversity_group)) +
geom_violin(trim = TRUE, alpha = 0.85) +
geom_boxplot(alpha = 0.5, fill = "white", width = 0.4) +
geom_jitter(color = "black", fill = "white", alpha = 0.5, shape = 21, size = 1.5, position = position_jitter(width = 0.15, seed = 123)) +
geom_text(data = genes_stats_anno, label = genes_stats_anno$label, nudge_x = -0.35) +
theme_bw() +
theme(
panel.grid = eb(),
plot.background = eb(),
panel.border = er(colour = "black",
fill = NA,
linewidth = 0.5),
axis.title = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
axis.text.x = eb(),
axis.ticks.x = eb(),
strip.text.x = eb(),
strip.text.y = et(angle = 0, size = 14, hjust = 0),
strip.background = eb(),
axis.line = el(color = "black"),
legend.position = "right",
) +
facet_grid(. ~diversity_group, scales = "free", space = "free")+
scale_fill_manual(values = diversity_group_colors) +
ylab("Genes Present (%) \n") +
xlab("") +
guides(fill = guide_legend(title = "Diversity Group"),
color = guide_legend(title = "Diversity Group")) +
scale_x_discrete(expand = expansion(mult = c(2, 2))) +
scale_y_continuous(breaks = seq(0, 1, 0.25),
expand = expansion(mult = c(0.01, 0.03)),
labels = c("0%", "25%", "50%", "75%", "100%"))
# pdf(file = "./Results/Figure_5.pdf", height = 14, width = 20, onefile = FALSE)
# gg.stack(gg_metaphlan_family,
# # gg_metaphlan,
# gg_ba_genes_heatmap,
# gg_ba_genes_bars,
# gg_ba_genes_box,
# heights = c(0.2, 0.5, 0.2, 0.2),
# gap = 2)
# dev.off()
pdf(file = "./Results/Figure_4.pdf", height = 10, width = 16, onefile = FALSE)
(
(gg_metaphlan_pathos + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork:: plot_spacer() /
(gg_ba_genes_heatmap + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork::plot_spacer() /
(gg_ba_genes_bars + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork::plot_spacer() /
(gg_ba_genes_box + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt")))
) +
patchwork::plot_layout(guides = 'collect',
heights = c(0.15,
-0.03, # Spacer
0.6,
-0.03, # Spacer
0.3,
-0.03, # Spacer
0.3))
invisible(dev.off())
{
((gg_metaphlan_pathos + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork:: plot_spacer() /
(gg_ba_genes_heatmap + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork::plot_spacer() /
(gg_ba_genes_bars + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt"))) /
patchwork::plot_spacer() /
(gg_ba_genes_box + theme(plot.margin = margin(t = 0, r = 5, b = -5, l = 5, "pt")))
) +
patchwork::plot_layout(guides = 'collect',
heights = c(0.15,
-0.03, # Spacer
0.6,
-0.03, # Spacer
0.3,
-0.03, # Spacer
0.3))
}heatmap_data <-
t_metaphlan %>%
drop_na(taxid) %>%
select(sampleID) %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant", "Healthy Donor")) %>%
left_join(metab_qual_anon) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound),
compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
group_by(compound) %>%
mutate(median_val = ifelse(median(mvalue, na.rm = TRUE) == 0, min(mvalue[mvalue > 0], na.rm = TRUE)/10, median(mvalue, na.rm = TRUE)),
heatmap_val = ifelse(log(mvalue/ median_val, base = 2) == -Inf, 0, log(mvalue/ median_val, base = 2))) %>%
ungroup() %>%
select(-c(mvalue, median_val)) %>%
group_by(sampleID, compound) %>%
slice_max(heatmap_val, with_ties = F, n = 1) %>%
ungroup() %>%
select(-db) %>%
left_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db, diversity_group_abv)) %>%
distinct(sampleID, db, diversity_group_abv), by = "sampleID") %>%
group_by(sampleID, compound) %>%
slice_max(heatmap_val, with_ties = F, n = 1) %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup()
# Stats
heatmap_pvals <-
heatmap_data %>%
filter(diversity_group_abv != "Healthy Donor") %>%
drop_na(compound) %>%
group_by(compound) %>%
rstatix::kruskal_test(heatmap_val~diversity_group_abv) %>%
rstatix::adjust_pvalue(method = "BH") %>%
select(compound, statistic, p, p.adj)
heatmap_labels <-
heatmap_data %>%
mutate(compound = str_to_title(compound)) %>%
left_join(heatmap_lookup) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
arrange(class, subclass, p) %>%
arrange(class, subclass) %>%
pivot_wider(c(diversity_group_abv, sampleID, db, Shannon), names_from = "compound", values_from = "heatmap_val", values_fn = mean) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup() %>%
distinct(sampleID, .keep_all = TRUE) %>%
select(diversity_group_abv) %>%
mutate(
diversity_group_abv = as.character(diversity_group_abv),
diversity_group_abv = ifelse(
grepl(pattern = "Healthy", as.character(diversity_group_abv)),
diversity_group_abv,
paste(diversity_group_abv, "Diversity")
),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low Diversity", "Medium Diversity",
"High Diversity", "Healthy Donor"))
)
# Build heatmap compound order (row order) and compound class (row slice)
heatmap_order <-
heatmap_data %>%
ungroup() %>%
filter(compound %in% heatmap_cmpds$compound) %>%
distinct(compound) %>%
drop_na() %>%
left_join(heatmap_lookup) %>%
mutate(class = case_when(class %in% c("Dicarboxylic Acid") ~ "Fatty Acid",
class %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ class),
subclass = case_when(subclass %in% c("Branched-Chain Fatty Acid",
"Aminated Fatty Acid",
"Long-Chain Fatty Acid",
"Dicarboxylic Acid") ~ "Other Fatty Acid",
subclass == "Indole" ~ "Tryptophan Metabs.",
subclass %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ subclass
)) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
ungroup() %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
# "Dicarboxylic Acid",
"Amino Acid", # 2
"Bile Acid", # 3
"Indole", # 4
"Add. Cmpds.", # 5
# "Phenolic Aromatic",
# "Kynurine Pathway",
"Vitamin" # 6
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Other Fatty Acid", # 2
"Amino Acid", # 3
# "Aminated Fatty Acid",
# "Long-Chain Fatty Acid",
# "Dicarboxylic Acid",
"Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Conjugated Bile Acid", # 6
"Tryptophan Metabs.", # 7
# "Phenolic Aromatic", #
# "Kynurine Pathway", #
"Vitamin", # 8
"Add. Cmpds." # 9
)
)) %>%
arrange(class, subclass, p, compound) %>%
select(class, subclass, p, compound)
# Build heatmap patient order following same order as gg_metaphlan plot
heatmap_column_order <- heatmap_data %>%
group_by(patientID) %>%
dplyr::slice(1) %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
select(patientID) %>%
distinct(patientID) %>%
pull(patientID)
# P-value legend color
pvalue_col_fun = colorRamp2(c(0, 0.045), c("#75C236", "#E3E4E6"))
# Create heatmap for adjusted p-values
pvalue_adj <-
heatmap_pvals %>%
group_by(compound, p.adj, p) %>%
dplyr::slice(1) %>%
ungroup() %>%
left_join(heatmap_lookup) %>%
mutate(class = case_when(class %in% c("Dicarboxylic Acid") ~ "Fatty Acid",
class %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ class),
subclass = case_when(subclass %in% c("Branched-Chain Fatty Acid",
"Aminated Fatty Acid",
"Long-Chain Fatty Acid",
"Dicarboxylic Acid") ~ "Other Fatty Acid",
subclass == "Indole" ~ "Tryptophan Metabs.",
subclass %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ subclass
)) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
ungroup() %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
# "Dicarboxylic Acid",
"Amino Acid", # 2
"Bile Acid", # 3
"Indole", # 4
"Add. Cmpds.", # 5
# "Phenolic Aromatic",
# "Kynurine Pathway",
"Vitamin" # 6
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Other Fatty Acid", # 2
"Amino Acid", # 3
# "Aminated Fatty Acid",
# "Long-Chain Fatty Acid",
# "Dicarboxylic Acid",
"Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Conjugated Bile Acid", # 6
"Tryptophan Metabs.", # 7
# "Phenolic Aromatic", #
# "Kynurine Pathway", #
"Vitamin", # 8
"Add. Cmpds." # 9
)
)) %>%
arrange(class, subclass, p.adj, compound) %>%
select(class, subclass, p.adj, compound) %>%
column_to_rownames(var = "compound") %>%
select(`Adjusted p-value` = p.adj) %>%
as.matrix() %>%
Heatmap(name = "Significance (adjusted p-value)",
cluster_rows = FALSE,
cluster_columns = FALSE,
col = pvalue_col_fun,
column_title = "KW Test",
column_title_side = "bottom",
column_title_rot = 90,
show_column_names = FALSE,
column_names_side = "top",
rect_gp = gpar(col = "black", lwd = 0.2),
row_gap = unit(3.5, "mm"),
row_names_side = "left",
row_order = heatmap_order$compound,
row_split = heatmap_order$subclass,
row_names_gp = gpar(fontsize = 19), # Compounds on y-axis
show_row_names = TRUE,
row_title_rot = 0,
row_title_gp = gpar(fontsize = 0),
heatmap_legend_param = list(at = seq(0.05, 0, -0.01)),
width = unit(0.1, "in")
)
# Heatmap legend color
col_fun <- colorRamp2(breaks = c(-5, 0, 5), colors = c("#00aaad", "white", "#ad003a"))
# Global parameter for annotation
# ht_opt$COLUMN_ANNO_PADDING <- unit(2.5, "mm")
gg_metab_heatmap <-
heatmap_data %>%
mutate(compound = str_to_title(compound),
compound = recode(compound,
Preq1 = "PreQ1")) %>%
left_join(heatmap_lookup) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1) %>% select(compound, p.adj, p), by = "compound") %>%
mutate(class = case_when(class %in% c("Dicarboxylic Acid") ~ "Fatty Acid",
class %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ class),
subclass = case_when(subclass %in% c("Branched-Chain Fatty Acid",
"Aminated Fatty Acid",
"Long-Chain Fatty Acid",
"Dicarboxylic Acid") ~ "Other Fatty Acid",
subclass == "Indole" ~ "Tryptophan Metabs.",
subclass %in% c("Phenolic Aromatic", "Kynurine Pathway") ~ "Add. Cmpds.",
TRUE ~ subclass
)) %>%
left_join(heatmap_pvals %>% group_by(compound, p.adj, p) %>% slice(1)) %>%
ungroup() %>%
mutate(class = factor(
class,
levels = c(
"Fatty Acid", # 1
# "Dicarboxylic Acid",
"Amino Acid", # 2
"Bile Acid", # 3
"Indole", # 4
"Add. Cmpds.", # 5
# "Phenolic Aromatic",
# "Kynurine Pathway",
"Vitamin" # 6
)
),
subclass = factor(
subclass,
levels = c(
"Short-Chain Fatty Acid", # 1
"Other Fatty Acid", # 2
"Amino Acid", # 3
# "Aminated Fatty Acid",
# "Long-Chain Fatty Acid",
# "Dicarboxylic Acid",
"Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Conjugated Bile Acid", # 6
"Tryptophan Metabs.", # 7
# "Phenolic Aromatic", #
# "Kynurine Pathway", #
"Vitamin", # 8
"Add. Cmpds." # 9
)
)) %>%
mutate(
diversity_group_abv = as.character(diversity_group_abv),
diversity_group_abv = ifelse(
grepl(pattern = "Healthy", as.character(diversity_group_abv)),
diversity_group_abv,
paste(diversity_group_abv, "Diversity")
),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low Diversity", "Medium Diversity",
"High Diversity", "Healthy Donor"))
) %>%
arrange(class, subclass, p.adj, compound) %>%
pivot_wider(c(diversity_group_abv, db, patientID, Shannon), names_from = compound, values_from = heatmap_val) %>%
group_by(patientID) %>%
arrange(patientID) %>%
ungroup() %>%
select(-`NA`) %>%
distinct(patientID, .keep_all = TRUE) %>%
group_by(diversity_group_abv) %>%
arrange(db, Shannon) %>%
ungroup() %>%
select(-Shannon, -db, -diversity_group_abv) %>%
column_to_rownames("patientID") %>%
as.matrix() %>%
t() %>%
Heatmap(
name = "Fold Change (log2)",
col = col_fun,
na_col = "grey83",
rect_gp = gpar(col = "grey40", lwd = 1.5),
# column_names_gp = grid::gpar(fontsize = 16),
column_gap = unit(2.5, "mm"),
column_split = heatmap_labels,
column_order = heatmap_column_order,
column_title_gp = gpar(fontsize = 20), # Diversity group labels on top of plot
column_title_rot = 0,
cluster_columns = FALSE,
show_column_names = FALSE,
show_column_dend = FALSE,
row_names_gp = gpar(fontsize = 16), # Compounds on y-axis
row_title_gp = gpar(fontsize = 18), # Compound classes on y-axis
row_title_side = "right", # Place the compound classes on right side of y-axis
row_gap = unit(3.5, "mm"),
row_names_side = c("left"),
row_order = heatmap_order$compound,
row_split = heatmap_order$subclass,
show_row_names = TRUE,
row_title_rot = 0,
cluster_rows = FALSE,
show_row_dend = FALSE,
heatmap_height = unit(24, "in"),
heatmap_width = unit(16.5, "in")
)
gg_metab_heatmap_tot <- pvalue_adj + gg_metab_heatmap
# gg_metab_heatmap_tot
pdf(file = "./Results/Figure_2.pdf", height = 24, width = 24, onefile = F)
gg_metab_heatmap_tot
invisible(dev.off())# Conversions of compounds
metab_conversions <- data.frame(
compound = c(
"Taurocholic Acid",
"Glycocholic Acid",
"Cholic Acid",
"3-Oxolithocholic Acid",
"Alloisolithocholic Acid",
"Deoxycholic Acid",
"Isodeoxycholic Acid",
"Lithocholic Acid",
"Kynurenic Acid",
"Kynurenine",
"Anthranilic Acid",
"Desaminotyrosine",
"Niacin",
"Tyrosine",
"Tryptamine",
"Tryptophan",
"Phenylalanine",
"Acetate",
"Butyrate",
"Propionate",
"Succinate"
),
molar_mass__gmol = c(
515.7,
465.6,
408.6,
374.6,
376.6,
392.6,
392.6,
376.6,
189.17,
208.21,
137.14,
166.17,
123.11,
181.19,
160.22,
204.22,
165.19,
59.04,
87.1,
73.07,
116.07
)
)
metab_quant_converted <- metab_quant_anon %>%
mutate(
compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)
) %>%
mutate(units = case_when(compound %in% c(
"Taurocholic Acid",
"Glycocholic Acid",
"Cholic Acid",
"3-Oxolithocholic Acid",
"Alloisolithocholic Acid",
"Deoxycholic Acid",
"Isodeoxycholic Acid",
"Lithocholic Acid"
) ~ "ugmL",
compound %in% c("Acetate", "Butyrate", "Succinate", "Propionate") ~ "mM",
TRUE ~ "uM")) %>%
right_join(metab_conversions, by = "compound") %>% # right_join to only keep compounds we're interested in
mutate(mvalue__mM = case_when(units == "ugmL" ~ (mvalue* #ugmL
((1000/ #1000 ml per L
1000000)/ #1000000 ug per gram
molar_mass__gmol)* #gram per mol (molar mass)
1000 #1000 mM per 1M
),
units == "uM" ~ mvalue/ #uM
1000, #1000uM per 1M
TRUE ~ mvalue #units are already in mM
)
) %>%
select(sampleID, compound, mvalue__mM)
metab_boxplot <-
metaphlan_df2 %>%
left_join(metaphlan_df_sumry) %>%
drop_na(taxid) %>%
select(sampleID, diversity_group_abv, db) %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
left_join(metab_quant_converted) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
ungroup() %>%
mutate(class = case_when(compound %in% c("Taurocholic Acid", "Glycocholic Acid") ~ "Conjugated Primary Bile Acid",
compound %in% c("Cholic Acid") ~ "Primary Bile Acid",
compound %in% c("3-Oxolithocholic Acid", "Alloisolithocholic Acid", "Deoxycholic Acid", "Isodeoxycholic Acid",
"Lithocholic Acid") ~ "Secondary Bile Acid",
compound %in% c("Threonine", "Glycine", "Tyrosine", "Tyramine", "Serine", "Leucine", "Isoleucine",
"Valine", "Phenylalanine", "Alanine", "Proline", "Aspartate",
"Methionine", "Glutamate", "Lysine", "Cysteine", "Tryptophan") ~ "Amino Acid",
compound %in% c("Acetate", "Butyrate", "Succinate", "Propionate") ~ "Short-Chain Fatty Acid",
compound %in% c("Kynurenic Acid", "Anthranilic Acid", "Kynurenine", "Tryptamine") ~ "Kynurenine Metabolite",
compound == "Desaminotyrosine" ~ "Phenolic Aromatic",
compound == "Niacin" ~ "B-Vitamin",
TRUE ~ "Indole"),
compound = case_when(class == "Conjugated Primary Bile Acid" ~ paste(compound, "(1ËšConj. BA)"),
class == "Primary Bile Acid" ~ paste(compound, "(1Ëš BA)"),
class == "Secondary Bile Acid" ~ paste(compound, "(2Ëš BA)"),
class == "Short-Chain Fatty Acid" ~ paste(compound, "(SCFA)"),
class == "Amino Acid" ~ paste(compound, "(AA)"),
class == "Phenolic Aromatic" ~ paste(compound, "(Phen. Arom.)"),
class == "Indole" ~ paste0(compound, "(Indole)"),
class == "Kynurenine Metabolite" ~ paste(compound, "(Kyn. Metab.)"),
class == "B-Vitamin" ~ paste(compound, "(B-Vitamin)")
)) %>%
drop_na() %>%
group_by(compound) %>%
mutate(count = length(unique(diversity_group_abv))) %>%
filter(count == 4) %>%
select(-count) %>%
mutate(compound = factor(
compound,
levels = c(
"Acetate (SCFA)", # 1
"Propionate (SCFA)", # 2
"Butyrate (SCFA)", # 3
"Succinate (SCFA)", # 4
"Tyrosine (AA)", # 5
"Tryptophan (AA)", # 6
"Phenylalanine (AA)", # 7
"Cholic Acid (1Ëš BA)", # 8
"Glycocholic Acid (1ËšConj. BA)", # 9
"Taurocholic Acid (1ËšConj. BA)", # 10
"Deoxycholic Acid (2Ëš BA)", # 11
"Lithocholic Acid (2Ëš BA)", # 12
"Isodeoxycholic Acid (2Ëš BA)", # 13
"3-Oxolithocholic Acid (2Ëš BA)", # 14
"Alloisolithocholic Acid (2Ëš BA)", # 15
"Desaminotyrosine (Phen. Arom.)", # 16
"Kynurenine (Kyn. Metab.)", # 17
"Anthranilic Acid (Kyn. Metab.)", # 18
"Tryptamine (Kyn. Metab.)", # 19
"Niacin (B-Vitamin)" # 20
)
),
class = factor(
class,
levels = c(
"Short-Chain Fatty Acid", # 1
"Amino Acid", # 2
"Primary Bile Acid", # 3
"Conjugated Primary Bile Acid", # 4
"Secondary Bile Acid", # 5
"Indole", # 6
"Phenolic Aromatic", # 7
"Kynurenine Metabolite", # 8
"B-Vitamin" # 9
)
)) %>%
filter(compound != "Kynurenic Acid") %>%
mutate(db = factor(db, levels = c("Liver Transplant", "Healthy Donor")),
diversity_group_abv = factor(diversity_group_abv, levels = c("Low", "Medium", "High", "Healthy Donor"))) %>%
filter(compound %in% c(
"Acetate (SCFA)",
"Butyrate (SCFA)",
"Propionate (SCFA)",
"Glycocholic Acid (1ËšConj. BA)",
"Taurocholic Acid (1ËšConj. BA)",
"Cholic Acid (1Ëš BA)",
"Deoxycholic Acid (2Ëš BA)",
"Lithocholic Acid (2Ëš BA)",
"Alloisolithocholic Acid (2Ëš BA)"
))
metab_boxplot_stats <-
metab_boxplot %>%
group_by(class, compound) %>%
rstatix::wilcox_test(mvalue__mM~diversity_group_abv,
comparisons = diversity_comps,
p.adjust.method = "none",
alternative= "two.sided") %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj") %>%
mutate(p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ", round(p.adj, 2)))) %>%
mutate(y.position = case_when(group1 == "High" & group2 == "Healthy Donor" ~ 2.30,
group1 == "Medium" & group2 == "High" ~ 2.7,
group1 == "Low" & group2 == "High" ~ 3.1,
group1 == "Low" & group2 == "Medium" ~ 3.5)) # Set stats brackets to fixed points since the y-scale will be log10 transformed
# Summary statistics
metab_boxplot_summay_stats <-
tbl_summary(
metab_boxplot %>%
ungroup() %>%
select(sampleID, diversity_group_abv, compound, mvalue__mM) %>%
pivot_wider(
id_cols = c(sampleID, diversity_group_abv),
names_from = "compound",
values_from = "mvalue__mM"
) %>% column_to_rownames(var = "sampleID"),
by = diversity_group_abv,
type = all_continuous() ~ "continuous2",
statistic = all_continuous() ~ c("{mean} ({sd})", "{median} ({p25}, {p75})", "{min}-{max}"),
digits = all_continuous() ~ function(x) format(x, digits = 3, scientific = TRUE)
) %>%
bold_labels() %>%
italicize_levels()
metab_boxplot_summay_stats %>%
gtsummary::modify_caption("**Diversity Group Summary Statistics**")| Characteristic | Low, N = 36 | Medium, N = 39 | High, N = 26 | Healthy Donor, N = 21 |
|---|---|---|---|---|
| Acetate (SCFA) | ||||
| Â Â Â Â Mean (SD) | 6.54e+00 (9.29e+00) | 2.12e+01 (2.46e+01) | 3.33e+01 (2.40e+01) | 4.82e+01 (1.85e+01) |
| Â Â Â Â Median (IQR) | 1.63e+00 (3.38e-01, 1.16e+01) | 1.29e+01 (2.02e+00, 4.14e+01) | 3.13e+01 (1.51e+01, 4.62e+01) | 4.93e+01 (2.95e+01, 6.31e+01) |
| Â Â Â Â Minimum-Maximum | 0.0e+00-4.81e+01 | 0.0e+00-1.14e+02 | 6.5e-01-8.62e+01 | 2.0e+01-7.61e+01 |
| Alloisolithocholic Acid (2Ëš BA) | ||||
| Â Â Â Â Mean (SD) | 5.90e-04 (7.63e-04) | 6.70e-04 (1.04e-03) | 6.51e-03 (1.08e-02) | 3.00e-02 (3.40e-02) |
| Â Â Â Â Median (IQR) | 2.79e-04 (0.00e+00, 7.97e-04) | 1.59e-04 (0.00e+00, 1.02e-03) | 1.18e-03 (0.00e+00, 6.52e-03) | 1.23e-02 (7.41e-03, 3.20e-02) |
| Â Â Â Â Minimum-Maximum | 0e+00-2.55e-03 | 0e+00-5.12e-03 | 0e+00-4.28e-02 | 0e+00-1.07e-01 |
| Butyrate (SCFA) | ||||
| Â Â Â Â Mean (SD) | 4.19e-01 (3.80e-01) | 2.36e+00 (3.22e+00) | 5.88e+00 (6.08e+00) | 1.53e+01 (1.01e+01) |
| Â Â Â Â Median (IQR) | 4.60e-01 (6.00e-02, 5.80e-01) | 8.30e-01 (3.05e-01, 3.25e+00) | 3.89e+00 (1.99e+00, 6.86e+00) | 1.24e+01 (8.87e+00, 2.34e+01) |
| Â Â Â Â Minimum-Maximum | 0.0e+00-1.28e+00 | 0.0e+00-1.22e+01 | 4.0e-02-2.66e+01 | 1.7e+00-3.85e+01 |
| Cholic Acid (1Ëš BA) | ||||
| Â Â Â Â Mean (SD) | 4.81e-01 (1.33e+00) | 8.09e-01 (1.80e+00) | 6.09e-01 (2.50e+00) | 2.08e-02 (4.12e-02) |
| Â Â Â Â Median (IQR) | 2.60e-02 (2.43e-03, 1.83e-01) | 1.23e-01 (2.13e-02, 7.06e-01) | 5.09e-03 (1.68e-03, 6.89e-02) | 5.41e-03 (2.14e-03, 1.21e-02) |
| Â Â Â Â Minimum-Maximum | 0.00e+00-7.49e+00 | 1.10e-03-9.18e+00 | 0.00e+00-1.27e+01 | 4.85e-04-1.67e-01 |
| Deoxycholic Acid (2Ëš BA) | ||||
| Â Â Â Â Mean (SD) | 6.61e-04 (7.86e-04) | 2.61e-02 (1.28e-01) | 2.52e-01 (3.31e-01) | 1.56e+00 (2.01e+00) |
| Â Â Â Â Median (IQR) | 3.53e-04 (3.82e-05, 1.13e-03) | 6.62e-04 (5.09e-05, 3.40e-03) | 1.01e-01 (4.12e-02, 2.63e-01) | 9.39e-01 (4.81e-01, 1.79e+00) |
| Â Â Â Â Minimum-Maximum | 0.00e+00-2.75e-03 | 0.00e+00-8.02e-01 | 3.33e-03-1.11e+00 | 1.01e-01-8.87e+00 |
| Glycocholic Acid (1ËšConj. BA) | ||||
| Â Â Â Â Mean (SD) | 1.39e-01 (5.16e-01) | 1.76e-01 (6.96e-01) | 1.42e-03 (3.08e-03) | 4.67e-03 (9.85e-03) |
| Â Â Â Â Median (IQR) | 4.51e-03 (1.66e-04, 2.43e-02) | 2.13e-03 (8.40e-05, 5.12e-02) | 2.04e-04 (0.00e+00, 1.50e-03) | 5.80e-04 (8.38e-05, 2.90e-03) |
| Â Â Â Â Minimum-Maximum | 0e+00-2.93e+00 | 0e+00-4.31e+00 | 0e+00-1.45e-02 | 0e+00-4.17e-02 |
| Lithocholic Acid (2Ëš BA) | ||||
| Â Â Â Â Mean (SD) | 3.73e-04 (6.01e-04) | 1.96e-02 (7.89e-02) | 4.21e-01 (5.52e-01) | 9.03e-01 (7.03e-01) |
| Â Â Â Â Median (IQR) | 0.00e+00 (0.00e+00, 5.58e-04) | 6.37e-04 (1.99e-04, 1.94e-03) | 1.46e-01 (4.75e-02, 6.92e-01) | 6.88e-01 (4.32e-01, 1.14e+00) |
| Â Â Â Â Minimum-Maximum | 0.00e+00-1.75e-03 | 0.00e+00-4.41e-01 | 2.37e-03-2.24e+00 | 1.72e-01-2.86e+00 |
| Propionate (SCFA) | ||||
| Â Â Â Â Mean (SD) | 6.58e-01 (1.02e+00) | 5.13e+00 (6.45e+00) | 1.07e+01 (8.24e+00) | 1.63e+01 (7.21e+00) |
| Â Â Â Â Median (IQR) | 4.35e-01 (1.38e-01, 6.20e-01) | 1.87e+00 (5.15e-01, 7.83e+00) | 9.50e+00 (3.95e+00, 1.70e+01) | 1.76e+01 (1.04e+01, 2.06e+01) |
| Â Â Â Â Minimum-Maximum | 0.00e+00-5.81e+00 | 0.00e+00-2.39e+01 | 8.00e-02-2.80e+01 | 2.96e+00-2.89e+01 |
| Taurocholic Acid (1ËšConj. BA) | ||||
| Â Â Â Â Mean (SD) | 5.62e-02 (9.73e-02) | 1.10e-01 (2.91e-01) | 1.05e-03 (2.16e-03) | 9.58e-04 (1.60e-03) |
| Â Â Â Â Median (IQR) | 9.42e-03 (2.48e-04, 6.03e-02) | 1.32e-03 (1.45e-04, 2.19e-02) | 2.91e-05 (0.00e+00, 5.43e-04) | 2.33e-04 (0.00e+00, 1.24e-03) |
| Â Â Â Â Minimum-Maximum | 0e+00-4.01e-01 | 0e+00-1.60e+00 | 0e+00-9.71e-03 | 0e+00-6.73e-03 |
gt::gtsave(gtsummary::as_gt(metab_boxplot_summay_stats), file = "./Results/Metab_Quant_Summary_Stats.png",
vwidth = 1500, vheight = 1000)
set.seed(123) # for consistent jittering of points
gg_metab_boxplot <-
ggboxplot(metab_boxplot,
x = "diversity_group_abv",
y = "mvalue__mM",
fill = "db",
color = "diversity_group_abv",
alpha = 0.65,
outlier.shape = NA,
facet.by = c("class", "compound")) +
theme(legend.text = et(size = 12, color = "black"),
legend.title = et(size = 14, color = "black"),
axis.title.x = eb(),
axis.title.y = et(size = 12, color = "black"),
panel.border = eb(),
strip.background = er(colour="white", fill="white"),
) +
geom_hline(yintercept = 0) +
geom_segment(aes(x = 0.35, y = 0, xend = 0.35, yend = Inf)) +
facet_wrap(~compound, scales = "fixed") +
stat_pvalue_manual(metab_boxplot_stats,
tip.length = 0.015) +
geom_point(
data = metab_boxplot,
aes(x = diversity_group_abv, y = mvalue__mM, color = diversity_group_abv),
position = position_jitter(width = 0.2),
size = 2,
alpha = 0.65) +
scale_fill_manual("Cohort", values = rev(pirate_colors)) +
scale_color_manual("Diversity Group", values = diversity_group_colors) +
scale_y_log10(limits = c(0.01, 5000),
labels = c(0.01, 0.1, 1, 10, 100, 1000),
breaks = c(0.01, 0.1, 1, 10, 100, 1000),
expand = expansion(mult = c(0.1, 0.2))) +
ylab("Concentration (mM)\n")
gg_metab_boxplotcairo_pdf(filename = "./Results/Supplemental_Figure_2.pdf", width = 14, height = 10, onefile = FALSE)
gg_metab_boxplot
invisible(dev.off())gg_ecoc_all_patients <- peri_matrix_all %>%
distinct(sampleID, .keep_all = T) %>%
select(sampleID, patientID, bact_infection_present) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Enterococcus", ignore.case = T)) %>%
count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
ungroup() %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
Genus)) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID),
Genus = factor(Genus, levels = unique(Genus)), Genus = fct_relevel(Genus,
"Firmicutes-Lactobacillales-Enterococcaceae-Enterococcus",
after = Inf)) %>%
arrange(-enterococcus_rel_abundance) %>%
filter(bact_infection_present != "No Infection") %>%
ggplot(aes(x = reorder(sampleID, -enterococcus_rel_abundance),
y = pctseqs)) + geom_bar(aes(fill = Genus), stat = "identity") +
theme_bw() + theme(legend.position = "none", axis.text.x = et(angle = 90,
hjust = 0.5), strip.text.x = eb(), strip.background = eb(),
axis.title.y = et(color = "black", size = 12), axis.text.y = et(color = "black",
size = 10)) + scale_fill_manual(values = metaphlan_pal2) +
scale_y_continuous(expand = expansion(mult = c(0.005, 0.005))) +
ylab("Shotgun Relative Abundance (%)\n") + xlab("") + facet_grid(~bact_infection_present,
space = "free_x", scales = "free_x")
# gg_ecoc_all_patients
# Discrete heatmap of infections
ecoc_infx_orgs <- peri_criteria_all %>%
filter(sampleID %in% peri_matrix_all$sampleID) %>%
group_by(patientID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Enterococcus", ignore.case = T)) %>%
count(sampleID, wt = pctseqs, name = "enterococcus_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
group_by(patientID, sampleID, organisms, org_presence) %>%
dplyr::slice(1) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present,
ignore.case = T), 0, org_presence)) %>%
ungroup() %>%
mutate(organisms = ifelse(bact_infection_present == "Yes" &
org_presence == 0, "Other", organisms)) %>%
arrange(-enterococcus_rel_abundance) %>%
mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture",
ignore.case = TRUE), organisms, "Other Bacterial Infection"),
organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection",
organisms))
ecoc_infx_orgs_order <- ecoc_infx_orgs %>%
group_by(sampleID, patientID, organisms) %>%
distinct(sampleID, patientID, .keep_all = T) %>%
ungroup() %>%
mutate(organisms = ifelse(grepl(organisms, pattern = "gallinarum"),
"Other Bacterial Infection", organisms), org_colors = case_when(grepl(x = organisms,
pattern = "enterococcus faecium", ignore.case = T) ~
1, grepl(x = organisms, pattern = "enterococcus faecalis",
ignore.case = T) ~ 2, grepl(x = organisms, pattern = "enterococcus avium",
ignore.case = T) ~ 3, grepl(x = organisms, pattern = "klebsiella pneumoniae",
ignore.case = T) ~ 4, grepl(x = organisms, pattern = "escherichia coli",
ignore.case = T) ~ 5, grepl(x = organisms, pattern = "proteus mirabilis",
ignore.case = T) ~ 6, grepl(x = organisms, pattern = "citrobacter freundii",
ignore.case = T) ~ 7, grepl(x = organisms, pattern = "other bacterial infection|gallinarum",
ignore.case = T) ~ 8, grepl(x = organisms, pattern = "culture negative",
ignore.case = T) ~ 9, TRUE ~ 0), organisms = as.factor(organisms),
organisms = factor(organisms, levels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Other Bacterial Infection", "Culture Negative",
"No Bacterial Infection")), org_colors = factor(org_colors,
levels = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "0"))) %>%
left_join(ecoc_infx_orgs) %>%
mutate(organisms = factor(organisms, levels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")),
org_colors = factor(org_colors, levels = c("1", "2",
"3", "4", "5", "6", "7", "8", "9", "0"))) %>%
drop_na(organisms) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
ungroup() %>%
filter(bact_infection_present == "Infection") %>%
droplevels()
gg_ecoc_infx_orgs <- ecoc_infx_orgs_order %>%
ggplot(., aes(x = reorder(sampleID, -enterococcus_rel_abundance),
y = organisms, fill = as.factor(org_colors))) + geom_tile(color = "black") +
theme_bw() + theme(axis.title.y = et(color = "black", size = 12),
axis.text.y = et(color = "black", size = 10), axis.ticks.x = eb(),
axis.text.x = eb(), axis.title.x = eb(), panel.grid = eb(),
strip.text = eb()) + scale_fill_manual(values = c("#129246",
"#0C7A3A", "#08592B", "#FF0000", "#CC0404", "#8A0202", "#5C0202",
"#E6C66E", "#BD992D", "#00000000"), labels = c("Enterococcus faecium",
"Enterococcus faecalis", "Enterococcus avium", "Klebsiella pneumoniae",
"Escherichia coli", "Citrobacter freundii", "Proteus mirabilis",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")) +
labs(fill = "Infecting Organism", y = "Infecting Organism\n") +
scale_y_discrete(expand = expansion(mult = c(0.005, 0.005)),
limits = rev(levels(ecoc_infx_orgs_order$organisms))) +
facet_grid(. ~ bact_infection_present, space = "free_x",
scales = "free_x")
# gg_ecoc_infx_orgs
yingtools2::gg.stack(gg_ecoc_all_patients, gg_ecoc_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)pdf(file = "./Results/Figure_5A.pdf", height = 8, width = 10,
onefile = F)
yingtools2::gg.stack(gg_ecoc_all_patients, gg_ecoc_infx_orgs,
heights = c(1, 0.5), adjust.themes = TRUE)
invisible(dev.off())
# Save object for combining later
fig_5a <- yingtools2::gg.stack(gg_ecoc_all_patients + theme(plot.margin = margin(t = 5,
r = 5, b = 5, l = 5)), gg_ecoc_infx_orgs + theme(legend.position = "none",
plot.margin = margin(t = 5, r = 5, b = 5, l = 5)), heights = c(1,
0.5), adjust.themes = TRUE, as.gtable = TRUE)gg_ebac_all_patients <- peri_matrix_all %>%
distinct(sampleID, .keep_all = T) %>%
select(sampleID, patientID, bact_infection_present) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Klebsiella pneumoniae|Escherichia coli|Citrobacter freundii|Proteus mirabilis")) %>%
count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
ungroup() %>%
arrange(Kingdom, Phylum, Class, Order, Family, Genus) %>%
mutate(Genus = paste0(Phylum, "-", Order, "-", Family, "-",
Genus)) %>%
group_by(sampleID) %>%
arrange(Genus) %>%
mutate(cum.pct = cumsum(pctseqs), y.text = (cum.pct + c(0,
cum.pct[-length(cum.pct)]))/2) %>%
ungroup() %>%
dplyr::select(-cum.pct) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID),
Genus = factor(Genus, levels = unique(Genus)), Genus = fct_relevel(Genus,
c("Proteobacteria-Enterobacterales-Enterobacteriaceae-Citrobacter",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Enterobacter",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Escherichia",
"Proteobacteria-Enterobacterales-Enterobacteriaceae-Klebsiella",
"Proteobacteria-Enterobacterales-Morganellaceae-Proteus"),
after = Inf)) %>%
arrange(-enterobacterales_rel_abundance) %>%
filter(bact_infection_present == "Infection") %>%
ggplot(aes(x = reorder(sampleID, -enterobacterales_rel_abundance),
y = pctseqs)) + geom_bar(aes(fill = Genus), stat = "identity") +
theme_bw() + theme(legend.position = "none", axis.text.x = et(angle = 90,
hjust = 0.5), strip.text.x = eb(), strip.background = eb(),
axis.title.y = et(color = "black", size = 12), axis.text.y = et(color = "black",
size = 10)) + scale_fill_manual(values = metaphlan_pal2) +
scale_y_continuous(expand = expansion(mult = c(0.005, 0.005))) +
ylab("Shotgun Relative Abundance (%)\n") + xlab("") + facet_grid(~bact_infection_present,
space = "free_x", scales = "free_x")
# gg_ebac_all_patients
# Discrete heatmap of infections
ebac_infx_orgs <- peri_criteria_all %>%
filter(sampleID %in% peri_matrix_all$sampleID) %>%
group_by(patientID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
left_join(metaphlan_peri_anon %>%
select(-bact_infection_present) %>%
group_by(patientID, sampleID) %>%
filter(grepl(x = Species, pattern = "Klebsiella pneumoniae|Escherichia coli|Citrobacter freundii|Proteus mirabilis")) %>%
count(sampleID, wt = pctseqs, name = "enterobacterales_rel_abundance")) %>%
left_join(metaphlan_peri_anon %>%
select(patientID, sampleID, Kingdom:pctseqs)) %>%
group_by(patientID, sampleID, organisms, org_presence) %>%
dplyr::slice(1) %>%
mutate(sampleID = ifelse(is.na(sampleID), patientID, sampleID)) %>%
mutate(org_presence = ifelse(grepl(pattern = "No", x = bact_infection_present,
ignore.case = T), 0, org_presence)) %>%
ungroup() %>%
mutate(organisms = ifelse(bact_infection_present == "Yes" &
org_presence == 0, "Other", organisms)) %>%
arrange(-enterobacterales_rel_abundance) %>%
mutate(organisms = ifelse(grepl(x = organisms, pattern = "enterococcus|klebsiella|escherichia|proteus|citrobacter|culture",
ignore.case = TRUE), organisms, "Other Bacterial Infection"),
organisms = ifelse(bact_infection_present == "No", "No Bacterial Infection",
organisms))
ebac_infx_orgs_order <- ebac_infx_orgs %>%
group_by(sampleID, patientID, organisms) %>%
distinct(sampleID, patientID, .keep_all = T) %>%
ungroup() %>%
mutate(organisms = ifelse(grepl(organisms, pattern = "gallinarum"),
"Other Bacterial Infection", organisms), org_colors = case_when(grepl(x = organisms,
pattern = "enterococcus faecium", ignore.case = T) ~
1, grepl(x = organisms, pattern = "enterococcus faecalis",
ignore.case = T) ~ 2, grepl(x = organisms, pattern = "enterococcus avium",
ignore.case = T) ~ 3, grepl(x = organisms, pattern = "klebsiella pneumoniae",
ignore.case = T) ~ 4, grepl(x = organisms, pattern = "escherichia coli",
ignore.case = T) ~ 5, grepl(x = organisms, pattern = "proteus mirabilis",
ignore.case = T) ~ 6, grepl(x = organisms, pattern = "citrobacter freundii",
ignore.case = T) ~ 7, grepl(x = organisms, pattern = "other bacterial infection|gallinarum",
ignore.case = T) ~ 8, grepl(x = organisms, pattern = "culture negative",
ignore.case = T) ~ 9, TRUE ~ 0), organisms = as.factor(organisms),
organisms = factor(organisms, levels = c("Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Enterococcus faecium", "Enterococcus faecalis",
"Enterococcus avium", "Other Bacterial Infection",
"Culture Negative", "No Bacterial Infection")), org_colors = factor(org_colors,
levels = c("4", "5", "6", "7", "1", "2", "3", "8",
"9", "0"))) %>%
left_join(ebac_infx_orgs) %>%
mutate(organisms = factor(organisms, levels = c("Klebsiella pneumoniae",
"Escherichia coli", "Proteus mirabilis", "Citrobacter freundii",
"Enterococcus faecium", "Enterococcus faecalis", "Enterococcus avium",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")),
org_colors = factor(org_colors, levels = c("4", "5",
"6", "7", "1", "2", "3", "8", "9", "0"))) %>%
drop_na(organisms) %>%
mutate(bact_infection_present = ifelse(bact_infection_present ==
"No", "No Infection", "Infection"), bact_infection_present = factor(bact_infection_present,
levels = c("Infection", "No Infection"))) %>%
ungroup() %>%
filter(bact_infection_present == "Infection") %>%
droplevels()
gg_ebac_infx_orgs <- ebac_infx_orgs_order %>%
ggplot(., aes(x = reorder(sampleID, -enterobacterales_rel_abundance),
y = organisms, fill = as.factor(org_colors))) + geom_tile(color = "black") +
theme_bw() + theme(axis.title.y = et(color = "black", size = 12),
axis.text.y = et(color = "black", size = 10), axis.ticks.x = eb(),
axis.text.x = eb(), axis.title.x = eb(), panel.grid = eb(),
strip.text = eb()) + scale_fill_manual(values = c("#FF0000",
"#CC0404", "#8A0202", "#5C0202", "#129246", "#0C7A3A", "#08592B",
"#E6C66E", "#BD992D", "#00000000"), labels = c("Klebsiella pneumoniae",
"Escherichia coli", "Citrobacter freundii", "Proteus mirabilis",
"Enterococcus faecium", "Enterococcus faecalis", "Enterococcus avium",
"Other Bacterial Infection", "Culture Negative", "No Bacterial Infection")) +
labs(fill = "Infecting Organism", y = "Infecting Organism\n") +
scale_y_discrete(expand = expansion(mult = c(0.005, 0.005)),
limits = rev(levels(ebac_infx_orgs_order$organisms))) +
facet_grid(. ~ bact_infection_present, space = "free_x",
scales = "free_x")
# gg_ebac_infx_orgs
yingtools2::gg.stack(gg_ebac_all_patients, gg_ebac_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)pdf(file = "./Results/Figure_5C.pdf", height = 8, width = 10,
onefile = F)
yingtools2::gg.stack(gg_ebac_all_patients, gg_ebac_infx_orgs,
heights = c(1, 0.5), adjust.themes = T)
invisible(dev.off())
# Save object for combining later
fig_5c <- yingtools2::gg.stack(gg_ebac_all_patients + theme(plot.margin = margin(t = 5,
r = 5, b = 5, l = 5)), gg_ebac_infx_orgs + theme(legend.position = "none",
plot.margin = margin(t = 5, r = 5, b = 5, l = 5)), heights = c(1,
0.5), adjust.themes = TRUE, as.gtable = TRUE)pdf(file = "./Results/Figure_5ABCD.pdf", height = 16, width = 16,
onefile = F)
cowplot::plot_grid(fig_5a, fig_5b + theme(plot.margin = margin(t = 5,
r = 5, b = 25, l = 5)), fig_5c, fig_5d + theme(plot.margin = margin(t = 5,
r = 5, b = 25, l = 5)), align = "h", axis = "bl", ncol = 2,
rel_widths = c(1, 0.9))
invisible(dev.off())# Qual compounds that we can quantify and that are also
# significant in the volcano analysis for both
signif_compounds <- qual_tot_ecoc_expan %>%
rownames_to_column(var = "compound") %>%
filter(p.adj <= 0.05 & abs(log2fc_val) > 0.75) %>%
distinct(compound)
metab_boxplot_ecoc_expan <- peri_matrix_all %>%
select(sampleID, enterococcus_rel_abundance) %>%
left_join(sample_lookup) %>%
group_by(sampleID) %>%
slice(1) %>%
left_join(metab_quant_converted %>%
filter(compound %in% signif_compounds$compound)) %>%
filter(compound %!in% c("Kynurenine", "Anthranilic Acid")) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate",
compound), compound = str_to_title(compound)) %>%
ungroup() %>%
mutate(db = ifelse(db == "HealthyDonor", "Healthy Donor",
"Liver Transplant") %>%
factor(., levels = c("Healthy Donor", "Liver Transplant")),
class = case_when(compound %in% c("Taurocholic Acid",
"Glycocholic Acid") ~ "Conjugated Primary Bile Acid",
compound %in% c("Cholic Acid") ~ "Primary Bile Acid",
compound %in% c("3-Oxolithocholic Acid", "Alloisolithocholic Acid",
"Deoxycholic Acid", "Isodeoxycholic Acid", "Lithocholic Acid") ~
"Secondary Bile Acid", compound %in% c("Threonine",
"Glycine", "Tyrosine", "Tyramine", "Serine",
"Leucine", "Isoleucine", "Valine", "Phenylalanine",
"Alanine", "Proline", "Aspartate", "Methionine",
"Glutamate", "Lysine", "Cysteine", "Tryptophan") ~
"Amino Acid", compound %in% c("Acetate", "Butyrate",
"Succinate", "Propionate") ~ "Short-Chain Fatty Acid",
compound %in% c("Kynurenic Acid", "Anthranilic Acid",
"Kynurenine", "Tryptamine") ~ "Kynurenine Metabolite",
compound == "Desaminotyrosine" ~ "Phenolic Aromatic",
compound == "Niacin" ~ "B-Vitamin", TRUE ~ "Indole")) %>%
drop_na() %>%
group_by(compound) %>%
mutate(class = factor(class, levels = c("Conjugated Primary Bile Acid",
"Primary Bile Acid", "Secondary Bile Acid", "Short-Chain Fatty Acid",
"Amino Acid", "Phenolic Aromatic", "Indole", "Kynurenine Metabolite",
"B-Vitamin")), compound = case_when(class == "Conjugated Primary Bile Acid" ~
paste(compound, "(1ËšConj. BA)"), class == "Primary Bile Acid" ~
paste(compound, "(1Ëš BA)"), class == "Secondary Bile Acid" ~
paste(compound, "(2Ëš BA)"), class == "Short-Chain Fatty Acid" ~
paste(compound, "(SCFA)"), class == "Amino Acid" ~ paste(compound,
"(AA)"), class == "Phenolic Aromatic" ~ paste(compound,
"(Phen. Arom.)"), class == "Indole" ~ paste0(compound,
"(Indole)"), class == "Kynurenine Metabolite" ~ paste(compound,
"(Kyn. Metab.)"), class == "B-Vitamin" ~ paste(compound,
"(B-Vitamin)")), compound = factor(compound, levels = c("Acetate (SCFA)",
"Butyrate (SCFA)", "Propionate (SCFA)", "Succinate (SCFA)",
"Taurocholic Acid (1ËšConj. BA)", "Glycocholic Acid (1ËšConj. BA)",
"Cholic Acid (1Ëš BA)", "3-Oxolithocholic Acid (2Ëš BA)",
"Alloisolithocholic Acid (2Ëš BA)", "Deoxycholic Acid (2Ëš BA)",
"Isodeoxycholic Acid (2Ëš BA)", "Lithocholic Acid (2Ëš BA)",
"Kynurenic Acid (Kyn. Metab.)", "Kynurenine (Kyn. Metab.)",
"Anthranilic Acid (Kyn. Metab.)", "Desaminotyrosine",
"Niacin", "Tyrosine", "Tryptamine", "Tryptophan", "Phenylalanine"))) %>%
ungroup() %>%
drop_na() %>%
mutate(enterococcus_expansion = ifelse(enterococcus_rel_abundance >=
optimal_cutpoint_rel$optimal_cutpoint[2], 1, 0)) %>%
arrange(enterococcus_expansion, sampleID) %>%
group_by(compound) %>%
mutate(enterococcus_expansion_0 = length(mvalue__mM[enterococcus_expansion ==
"0"]), enterococcus_expansion_1 = length(mvalue__mM[enterococcus_expansion ==
"1"])) %>%
filter(any(mvalue__mM != 0)) %>%
mutate(enterococcus_expansion = ifelse(enterococcus_expansion ==
"1", "Expansion", "No Expansion"))
metab_boxplot_summary_stats_ecoc_expan <- metab_boxplot_ecoc_expan %>%
group_by(compound) %>%
summarise(y.position = max(mvalue__mM) * 1.1)
metab_boxplot_stats_ecoc_expan <- metab_boxplot_ecoc_expan %>%
group_by(class, compound) %>%
rstatix::wilcox_test(mvalue__mM ~ enterococcus_expansion) %>%
rstatix::adjust_pvalue(method = "BH") %>%
rstatix::add_significance("p.adj") %>%
mutate(p.adj = ifelse(p.adj < 0.001, "p.adj < 0.001", paste("p.adj = ",
round(p.adj, 3)))) %>%
add_xy_position(x = "enterococcus_expansion") %>%
select(-y.position) %>%
left_join(metab_boxplot_summary_stats_ecoc_expan)
# gg_metab_boxplot_ecoc_expan <-
# ggboxplot(metab_boxplot_ecoc_expan, x =
# 'enterococcus_expansion', y = 'mvalue__mM', fill =
# 'enterococcus_expansion', alpha = 0.65, outlier.shape =
# NA ) + theme(legend.text = et(size = 14, color =
# 'black'), legend.title = et(size = 14, color = 'black'),
# axis.title.x = eb(), axis.title.y = et(size = 16, color =
# 'black'), strip.text = et(size = 14, color = 'black'),
# strip.background = eb(), panel.border = eb(), axis.line =
# eb()) + facet_wrap(~compound, scales = 'free_y', nrow =
# 2) + annotate('segment', x = 0.35, xend = 0.35, y = 0,
# yend = Inf, colour = 'black', linewidth = 1) +
# annotate('segment', x = 0.35, xend = Inf, y = 0, yend =
# 0, colour = 'black') +
# stat_pvalue_manual(metab_boxplot_stats_ecoc_expan, label
# = '{p.adj}', bracket.size = 0.5) + geom_point(data =
# metab_boxplot_ecoc_expan, aes(x = enterococcus_expansion,
# y = mvalue__mM, fill = enterococcus_expansion), position
# = position_jitter(width = 0.2), size = 2, shape = 21,
# alpha = 0.65, color = 'black') +
# scale_fill_manual('Enterococcus', values = c('gray87',
# '#389458')) + scale_y_continuous(expand = expansion(mult
# = c(0.1, 0.2))) + ylab('Concentration (mM)\n')
# gg_metab_boxplot_ecoc_expan cairo_pdf(filename =
# './Results/Figure_5B.pdf', height = 8, width = 16,
# onefile = TRUE) gg_metab_boxplot_ecoc_expan
# invisible(dev.off())diversity_metab_mat <-
metab_qual_anon %>% filter(sampleID %in% first_samps$sampleID) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>%
group_by(sampleID, compound, diversity_group_abv) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, diversity_group_abv) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`, - diversity_group_abv) %>%
filter_all(any_vars(!is.na(.)))
diversity_metab_labs <-
diversity_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group_abv)) %>%
droplevels() %>%
pull(diversity_group_abv)
dim(diversity_metab_mat) #101 93 (means 93 compounds and 101 LT patients/infections)## [1] 101 93
length(diversity_metab_labs) #101 (means 101 LT patients/infections)## [1] 101
# Begin model training
set.seed(1234)
diversity_train <- sample(1:nrow(diversity_metab_mat), as.integer(0.7*nrow(diversity_metab_mat))) # randomly select 70% samples in training
diversity_test <- setdiff(1:nrow(diversity_metab_mat), diversity_train) # rest is part of the test set
# store matrices into training and test set:
diversity_metab_mat.train <- diversity_metab_mat[diversity_train, ]
diversity_metab_mat.test <- diversity_metab_mat[diversity_test,]
diversity_metab_labs.train <- diversity_metab_labs[diversity_train]
diversity_metab_labs.test <- diversity_metab_labs[diversity_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
diversity_train_splsda <- mixOmics::splsda(diversity_metab_mat.train, diversity_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
diversity_train_plsda_perf <-
perf(
diversity_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
diversity_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 4 might be best for classification error rate and max.dist# Number of optimal variables to select for each component
diversity_train_keepX <- c(1:10, seq(20, 130, 10))
set.seed(123)
diversity_train_tune_splsda <-
mixOmics::tune.splsda(
diversity_metab_mat.train,
diversity_metab_labs.train,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = diversity_train_keepX,
nrepeat = 50
)
plot(diversity_train_tune_splsda, col = color.jet(4))diversity_train_error <- diversity_train_tune_splsda$error.rate
diversity_train_ncomp <- diversity_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
diversity_train_ncomp #1 component is optimal## [1] 1
diversity_train_select_keepX <- diversity_train_tune_splsda$choice.keepX[1:ifelse(diversity_train_ncomp == 1, diversity_train_ncomp + 1, diversity_train_ncomp)] # optimal number of variables to select per component
diversity_train_select_keepX## comp1 comp2
## 80 8
# Final Model
diversity_train_splsda_final <-
mixOmics::splsda(diversity_metab_mat.train, diversity_metab_labs.train, ncomp = ifelse(diversity_train_ncomp == 1, diversity_train_ncomp + 1, diversity_train_ncomp), keepX = diversity_train_select_keepX)
# Test the model
diversity_predict_train_splsda_final <- predict(diversity_train_splsda_final, diversity_metab_mat.test,
dist = "max.dist")
diversity_predict_train_comp2 <- diversity_predict_train_splsda_final$class$max.dist[,ifelse(diversity_train_ncomp == 1, diversity_train_ncomp + 1, diversity_train_ncomp)]
diversity_union <- union(diversity_predict_train_comp2, diversity_metab_labs.test)
confusionMatrix(table(factor(diversity_predict_train_comp2, diversity_union,
levels = c("Low", "Medium", "High")),
factor(diversity_metab_labs.test, diversity_union,
levels = c("Low", "Medium", "High"))))## Confusion Matrix and Statistics
##
##
## Medium High Low
## Medium 10 4 0
## High 3 6 3
## Low 0 0 5
##
## Overall Statistics
##
## Accuracy : 0.6774
## 95% CI : (0.4863, 0.8332)
## No Information Rate : 0.4194
## P-Value [Acc > NIR] : 0.003321
##
## Kappa : 0.4992
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Medium Class: High Class: Low
## Sensitivity 0.7692 0.6000 0.6250
## Specificity 0.7778 0.7143 1.0000
## Pos Pred Value 0.7143 0.5000 1.0000
## Neg Pred Value 0.8235 0.7895 0.8846
## Prevalence 0.4194 0.3226 0.2581
## Detection Rate 0.3226 0.1935 0.1613
## Detection Prevalence 0.4516 0.3871 0.1613
## Balanced Accuracy 0.7735 0.6571 0.8125
diversity_train_background <- background.predict(diversity_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
diversity_tot <- predict(diversity_train_splsda_final,
diversity_metab_mat,
dist = "max.dist")
diversity_tot_predict <- diversity_tot$class$max.dist[,ifelse(diversity_train_ncomp == 1, diversity_train_ncomp + 1, diversity_train_ncomp)]
diversity_tot_union <- union(diversity_tot_predict, diversity_metab_labs)
diversity_cm <- confusionMatrix(table(factor(diversity_tot_predict, diversity_tot_union,
levels = c("Low", "Medium", "High")),
factor(diversity_metab_labs, diversity_tot_union,
levels = c("Low", "Medium", "High"))),
)
diversity_cm## Confusion Matrix and Statistics
##
##
## Medium Low High
## Medium 30 11 2
## Low 6 27 6
## High 0 1 18
##
## Overall Statistics
##
## Accuracy : 0.7426
## 95% CI : (0.646, 0.8244)
## No Information Rate : 0.3861
## P-Value [Acc > NIR] : 3.744e-13
##
## Kappa : 0.6044
##
## Mcnemar's Test P-Value : 0.07057
##
## Statistics by Class:
##
## Class: Medium Class: Low Class: High
## Sensitivity 0.8333 0.6923 0.6923
## Specificity 0.8000 0.8065 0.9867
## Pos Pred Value 0.6977 0.6923 0.9474
## Neg Pred Value 0.8966 0.8065 0.9024
## Prevalence 0.3564 0.3861 0.2574
## Detection Rate 0.2970 0.2673 0.1782
## Detection Prevalence 0.4257 0.3861 0.1881
## Balanced Accuracy 0.8167 0.7494 0.8395
# Additional model measures
diversity_epi <- mltest::ml_test(predicted = factor(diversity_tot_predict, levels = c("Low", "Medium", "High")),
true = factor(diversity_metab_labs, levels = c("Low", "Medium", "High")))
diversity_cm_names <- diversity_cm$table
colnames(diversity_cm_names) <- c("Actual\nLow", "Actual\nMedium", "Actual\nHigh")
rownames(diversity_cm_names) <- c("Predicted\nLow", "Predicted\nMedium", "Predicted\nHigh")
diversity_confusion_df <- diversity_cm_names %>%
t()
{
pdf(file = "./Results/Figure_3A.pdf", height = 10, width = 10)
plotIndiv(
diversity_train_splsda_final,
xlim = c(min(diversity_train_splsda_final$variates$X[,1])*1.05, max(diversity_train_splsda_final$variates$X[,1])*1.8),
ylim = c(min(diversity_train_splsda_final$variates$X[,2])*1.15, max(diversity_train_splsda_final$variates$X[,2])*1.8),
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = diversity_train_background,
col = c("#3A001E", "#8A0246", "#C20463"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Diversity Group",
X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = -5.25,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
diversity_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -5.5,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
paste0("Overall ACC = ",
paste0(formatC(round(diversity_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(diversity_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(diversity_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = 3,
x = 1.5,
paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.7,
x = 1.5,
paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.4,
x = 1.5,
paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
# text(
# y = 2.1,
# x = 1.5,
# paste0("Low Diversity OR = ", round(diversity_epi$DOR[1], 1)),
# cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -4.7,
x = 0,
paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5,
x = 0,
paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5.3,
x = 0,
paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
# text(
# y = -5.9,
# x = 1.5,
# paste0("Medium Diversity OR = ", round(diversity_epi$DOR[2], 1)),
# cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.7,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Sens. = ", round(diversity_epi$precision[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.4,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
# text(
# y = 2.1,
# x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
# paste0("High Diversity OR = ", round(diversity_epi$DOR[3], 1)),
# cex = 0.75, adj = 0, col = "#C20463")
invisible(dev.off())
}
plotIndiv(
diversity_train_splsda_final,
xlim = c(min(diversity_train_splsda_final$variates$X[,1])*1.05, max(diversity_train_splsda_final$variates$X[,1])*1.8),
ylim = c(min(diversity_train_splsda_final$variates$X[,2])*1.15, max(diversity_train_splsda_final$variates$X[,2])*1.8),
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = diversity_train_background,
col = c("#3A001E", "#8A0246", "#C20463"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Diversity Group",
X.label = paste0("Component 1 (", round(diversity_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(diversity_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = -5.25,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
diversity_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -5.5,
x = min(diversity_train_splsda_final$variates$X[,1])*1.1,
paste0("Overall ACC = ",
paste0(formatC(round(diversity_cm$overall[1], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(diversity_cm$overall[3], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(diversity_cm$overall[4], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = 3,
x = 1.5,
paste0("Low Diversity ACC = ", round(diversity_epi$balanced.accuracy[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.7,
x = 1.5,
paste0("Low Diversity Sens. = ", round(diversity_epi$precision[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
text(
y = 2.4,
x = 1.5,
paste0("Low Diversity Spec. = ", round(diversity_epi$specificity[1]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#3A001E")
# text(
# y = 2.1,
# x = 1.5,
# paste0("Low Diversity OR = ", round(diversity_epi$DOR[1], 1)),
# cex = 0.75, adj = 0, col = "#3A001E")
text(
y = -4.7,
x = 0,
paste0("Medium Diversity ACC = ", round(diversity_epi$balanced.accuracy[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5,
x = 0,
paste0("Medium Diversity Sens. = ", round(diversity_epi$precision[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
text(
y = -5.3,
x = 0,
paste0("Medium Diversity Spec. = ", round(diversity_epi$specificity[2]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#8A0246")
# text(
# y = -5.9,
# x = 1.5,
# paste0("Medium Diversity OR = ", round(diversity_epi$DOR[2], 1)),
# cex = 0.75, adj = 0, col = "#8A0246")
text(
y = 3,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity ACC = ", round(diversity_epi$balanced.accuracy[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.7,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Sens. = ", round(diversity_epi$precision[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")
text(
y = 2.4,
x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
paste0("High Diversity Spec. = ", round(diversity_epi$specificity[3]*100, 1), "%"),
cex = 0.75, adj = 0, col = "#C20463")# text(
# y = 2.1,
# x = min(diversity_train_splsda_final$variates$X[,1])*1.05,
# paste0("High Diversity OR = ", round(diversity_epi$DOR[3], 1)),
# cex = 0.75, adj = 0, col = "#C20463")ecoc_doms_metab_mat <-
metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
group_by(sampleID, compound) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
right_join(peri_matrix_all %>%
mutate(domination = case_when(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
# enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
select(-domination) %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`) %>%
filter_all(any_vars(!is.na(.)))
ecoc_doms_metab_labs <-
ecoc_doms_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>%
mutate(domination = case_when(enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
# enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
pull(domination)
dim(ecoc_doms_metab_mat) #107 93 (means 93 compounds and 108 LT patients)## [1] 107 93
length(ecoc_doms_metab_labs) #107 (means 108 LT patients)## [1] 107
# Begin model training
set.seed(1234)
ecoc_doms_train <- sample(1:nrow(ecoc_doms_metab_mat), as.integer(0.7*nrow(ecoc_doms_metab_mat))) # randomly select 70% samples in training
ecoc_doms_test <- setdiff(1:nrow(ecoc_doms_metab_mat), ecoc_doms_train) # rest is part of the test set
# store matrices into training and test set:
ecoc_doms_metab_mat.train <- ecoc_doms_metab_mat[ecoc_doms_train, ]
ecoc_doms_metab_mat.test <- ecoc_doms_metab_mat[ecoc_doms_test,]
ecoc_doms_metab_labs.train <- ecoc_doms_metab_labs[ecoc_doms_train]
ecoc_doms_metab_labs.test <- ecoc_doms_metab_labs[ecoc_doms_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
ecoc_doms_train_splsda <- mixOmics::splsda(ecoc_doms_metab_mat.train, ecoc_doms_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
ecoc_doms_train_plsda_perf <-
perf(
ecoc_doms_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
ecoc_doms_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 1 or 4 is best for classification error rate and max.dist# Number of optimal variables to select for each component
ecoc_doms_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
ecoc_doms_train_tune_splsda <-
mixOmics::tune.splsda(
ecoc_doms_metab_mat.train,
ecoc_doms_metab_labs.train,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = ecoc_doms_train_keepX,
nrepeat = 50
)
plot(ecoc_doms_train_tune_splsda, col = color.jet(4))ecoc_doms_train_error <- ecoc_doms_train_tune_splsda$error.rate
ecoc_doms_train_ncomp <- ecoc_doms_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
# ecoc_doms_train_ncomp = 4 #4 components are optimal via visual inspection
ecoc_doms_train_select_keepX <- ecoc_doms_train_tune_splsda$choice.keepX[1:ifelse(ecoc_doms_train_ncomp == 1, ecoc_doms_train_ncomp + 1, ecoc_doms_train_ncomp)] # optimal number of variables to select per component
ecoc_doms_train_select_keepX## comp1 comp2
## 1 20
# Final Model
ecoc_doms_train_splsda_final <-
mixOmics::splsda(ecoc_doms_metab_mat.train, ecoc_doms_metab_labs.train, ncomp = ifelse(ecoc_doms_train_ncomp == 1, ecoc_doms_train_ncomp + 1, ecoc_doms_train_ncomp), keepX = ecoc_doms_train_select_keepX)
# Test the model
ecoc_doms_predict_train_splsda_final <- predict(ecoc_doms_train_splsda_final, ecoc_doms_metab_mat.test,
dist = "max.dist")
ecoc_doms_predict_train_comp2 <- ecoc_doms_predict_train_splsda_final$class$max.dist[,ifelse(ecoc_doms_train_ncomp == 1, ecoc_doms_train_ncomp + 1, ecoc_doms_train_ncomp)]
ecoc_doms_union <- union(ecoc_doms_predict_train_comp2, ecoc_doms_metab_labs.test)
confusionMatrix(table(factor(ecoc_doms_predict_train_comp2, ecoc_doms_union),
factor(ecoc_doms_metab_labs.test, ecoc_doms_union)),
negative = "0")## Confusion Matrix and Statistics
##
##
## No Expansion Expansion
## No Expansion 16 5
## Expansion 3 9
##
## Accuracy : 0.7576
## 95% CI : (0.5774, 0.8891)
## No Information Rate : 0.5758
## P-Value [Acc > NIR] : 0.02392
##
## Kappa : 0.4943
##
## Mcnemar's Test P-Value : 0.72367
##
## Sensitivity : 0.8421
## Specificity : 0.6429
## Pos Pred Value : 0.7619
## Neg Pred Value : 0.7500
## Prevalence : 0.5758
## Detection Rate : 0.4848
## Detection Prevalence : 0.6364
## Balanced Accuracy : 0.7425
##
## 'Positive' Class : No Expansion
##
ecoc_doms_train_background <- background.predict(ecoc_doms_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
ecoc_doms_tot <- predict(ecoc_doms_train_splsda_final,
ecoc_doms_metab_mat,
dist = "max.dist")
ecoc_doms_tot_predict <- ecoc_doms_tot$class$max.dist[,ifelse(ecoc_doms_train_ncomp == 1, ecoc_doms_train_ncomp + 1, ecoc_doms_train_ncomp)]
ecoc_doms_tot_union <- union(ecoc_doms_tot_predict, ecoc_doms_metab_labs)
ecoc_doms_cm <- confusionMatrix(table(factor(ecoc_doms_tot_predict, ecoc_doms_tot_union,
levels = c("No Expansion","Expansion")),
factor(ecoc_doms_metab_labs, ecoc_doms_tot_union,
levels = c("No Expansion","Expansion"))),
positive = "Expansion")
ecoc_doms_cm## Confusion Matrix and Statistics
##
##
## No Expansion Expansion
## No Expansion 59 14
## Expansion 7 27
##
## Accuracy : 0.8037
## 95% CI : (0.7158, 0.8742)
## No Information Rate : 0.6168
## P-Value [Acc > NIR] : 2.546e-05
##
## Kappa : 0.5709
##
## Mcnemar's Test P-Value : 0.1904
##
## Sensitivity : 0.6585
## Specificity : 0.8939
## Pos Pred Value : 0.7941
## Neg Pred Value : 0.8082
## Prevalence : 0.3832
## Detection Rate : 0.2523
## Detection Prevalence : 0.3178
## Balanced Accuracy : 0.7762
##
## 'Positive' Class : Expansion
##
# Additional model measures
ecoc_doms_epi <- epiR::epi.tests(table(ecoc_doms_tot_predict, ecoc_doms_metab_labs), conf.level = 0.95)
ecoc_doms_confusion_df <- ecoc_doms_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nExpansion",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Expansion",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nExpansion" = "Test +",
"Predicted\nNo Expansion" = "Test -") %>%
column_to_rownames(var = "actual")
{
pdf(file = "./Results/Figure_6A.pdf", height = 10, width = 10)
plotIndiv(
ecoc_doms_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = ecoc_doms_train_background,
col = c("#0C7A3A", "black"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Expansion",
X.label = paste0("Component 1 (", round(ecoc_doms_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(ecoc_doms_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*1.05,
x = min(ecoc_doms_train_splsda_final$variates$X[,1])*1.15,
ecoc_doms_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -3.5,
x = 1.5 ,
substitute(bold("Expansion")), cex = 1.75, adj = 0, col = "#0C7A3A")
text(
y = 3 ,
x = 0.75,
substitute(bold("No Expansion")), cex = 1.75, adj = 0, col = "black")
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.825,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("ACC = ",
round(ecoc_doms_cm$overall[1]*100, 1), "% [",
round(ecoc_doms_cm$overall[3]*100, 1), "%, ",
round(ecoc_doms_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.875,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("Sens. = ",
paste0(formatC(round(ecoc_doms_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ecoc_doms_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ecoc_doms_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.925,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("Spec. = ",
paste0(formatC(round(ecoc_doms_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ecoc_doms_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ecoc_doms_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.975,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("OR = ",
formatC(round(ecoc_doms_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(ecoc_doms_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(ecoc_doms_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)
invisible(dev.off())
}
plotIndiv(
ecoc_doms_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = ecoc_doms_train_background,
col = c("#0C7A3A", "black"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Expansion",
X.label = paste0("Component 1 (", round(ecoc_doms_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(ecoc_doms_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*1.05,
x = min(ecoc_doms_train_splsda_final$variates$X[,1])*1.15,
ecoc_doms_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = -3.5,
x = 1.5 ,
substitute(bold("Expansion")), cex = 1.75, adj = 0, col = "#0C7A3A")
text(
y = 3 ,
x = 0.75,
substitute(bold("No Expansion")), cex = 1.75, adj = 0, col = "black")
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.825,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("ACC = ",
round(ecoc_doms_cm$overall[1]*100, 1), "% [",
round(ecoc_doms_cm$overall[3]*100, 1), "%, ",
round(ecoc_doms_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.875,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("Sens. = ",
paste0(formatC(round(ecoc_doms_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ecoc_doms_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ecoc_doms_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.925,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("Spec. = ",
paste0(formatC(round(ecoc_doms_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ecoc_doms_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ecoc_doms_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ecoc_doms_train_splsda_final$variates$X[,2])*0.975,
x = max(ecoc_doms_train_splsda_final$variates$X[,1])*0.25,
paste0("OR = ",
formatC(round(ecoc_doms_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(ecoc_doms_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(ecoc_doms_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)ebac_doms_metab_mat <-
metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
group_by(sampleID, compound) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
right_join(peri_matrix_all %>%
mutate(domination = case_when(
#enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
select(-domination) %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`) %>%
filter_all(any_vars(!is.na(.)))
ebac_doms_metab_labs <-
ebac_doms_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>%
mutate(domination = case_when(
# enterococcus_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[2] #|
enterobacterales_rel_abundance >= optimal_cutpoint_rel$optimal_cutpoint[1]
~ "Expansion",
TRUE ~ "No Expansion")) %>%
select(sampleID, domination)) %>%
pull(domination)
dim(ebac_doms_metab_mat) #107 93 (means 93 compounds and 108 LT patients)## [1] 107 93
length(ebac_doms_metab_labs) #107 (means 108 LT patients)## [1] 107
# Begin model training
set.seed(1234)
ebac_doms_train <- sample(1:nrow(ebac_doms_metab_mat), as.integer(0.7*nrow(ebac_doms_metab_mat))) # randomly select 70% samples in training
ebac_doms_test <- setdiff(1:nrow(ebac_doms_metab_mat), ebac_doms_train) # rest is part of the test set
# store matrices into training and test set:
ebac_doms_metab_mat.train <- ebac_doms_metab_mat[ebac_doms_train, ]
ebac_doms_metab_mat.test <- ebac_doms_metab_mat[ebac_doms_test,]
ebac_doms_metab_labs.train <- ebac_doms_metab_labs[ebac_doms_train]
ebac_doms_metab_labs.test <- ebac_doms_metab_labs[ebac_doms_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
ebac_doms_train_splsda <- mixOmics::splsda(ebac_doms_metab_mat.train, ebac_doms_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 50-repeat cross validation
set.seed(1234)
ebac_doms_train_plsda_perf <-
perf(
ebac_doms_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
ebac_doms_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 2 or 4 is best for classification error rate and max.dist# Number of optimal variables to select for each component
ebac_doms_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
ebac_doms_train_tune_splsda <-
mixOmics::tune.splsda(
ebac_doms_metab_mat.train,
ebac_doms_metab_labs.train,
ncomp = 4, # Choose 4 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = ebac_doms_train_keepX,
nrepeat = 50
)
plot(ebac_doms_train_tune_splsda, col = color.jet(4))ebac_doms_train_error <- ebac_doms_train_tune_splsda$error.rate
ebac_doms_train_ncomp <- ebac_doms_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
ebac_doms_train_ncomp #1 components are optimal## [1] 1
ebac_doms_train_select_keepX <- ebac_doms_train_tune_splsda$choice.keepX[1:ifelse(ebac_doms_train_ncomp == 1, ebac_doms_train_ncomp + 1, ebac_doms_train_ncomp)] # optimal number of variables to select per component
ebac_doms_train_select_keepX## comp1 comp2
## 1 10
# Final Model
ebac_doms_train_splsda_final <-
mixOmics::splsda(ebac_doms_metab_mat.train, ebac_doms_metab_labs.train, ncomp = ifelse(ebac_doms_train_ncomp == 1, ebac_doms_train_ncomp + 1, ebac_doms_train_ncomp), keepX = ebac_doms_train_select_keepX)
# Test the model
ebac_doms_predict_train_splsda_final <- predict(ebac_doms_train_splsda_final, ebac_doms_metab_mat.test,
dist = "max.dist")
ebac_doms_predict_train_comp2 <- ebac_doms_predict_train_splsda_final$class$max.dist[,ifelse(ebac_doms_train_ncomp == 1, ebac_doms_train_ncomp + 1, ebac_doms_train_ncomp)]
ebac_doms_union <- union(ebac_doms_predict_train_comp2, ebac_doms_metab_labs.test)
confusionMatrix(table(factor(ebac_doms_predict_train_comp2, ebac_doms_union),
factor(ebac_doms_metab_labs.test, ebac_doms_union)),
negative = "0")## Confusion Matrix and Statistics
##
##
## Expansion No Expansion
## Expansion 1 1
## No Expansion 6 25
##
## Accuracy : 0.7879
## 95% CI : (0.6109, 0.9102)
## No Information Rate : 0.7879
## P-Value [Acc > NIR] : 0.5994
##
## Kappa : 0.1413
##
## Mcnemar's Test P-Value : 0.1306
##
## Sensitivity : 0.14286
## Specificity : 0.96154
## Pos Pred Value : 0.50000
## Neg Pred Value : 0.80645
## Prevalence : 0.21212
## Detection Rate : 0.03030
## Detection Prevalence : 0.06061
## Balanced Accuracy : 0.55220
##
## 'Positive' Class : Expansion
##
ebac_doms_train_background <- background.predict(ebac_doms_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
ebac_doms_tot <- predict(ebac_doms_train_splsda_final,
ebac_doms_metab_mat,
dist = "max.dist")
ebac_doms_tot_predict <- ebac_doms_tot$class$max.dist[,ifelse(ebac_doms_train_ncomp == 1, ebac_doms_train_ncomp + 1, ebac_doms_train_ncomp)]
ebac_doms_tot_union <- union(ebac_doms_tot_predict, ebac_doms_metab_labs)
ebac_doms_cm <- confusionMatrix(table(factor(ebac_doms_tot_predict, ebac_doms_tot_union,
levels = c("No Expansion","Expansion")),
factor(ebac_doms_metab_labs, ebac_doms_tot_union,
levels = c("No Expansion","Expansion"))),
positive = "Expansion")
ebac_doms_cm## Confusion Matrix and Statistics
##
##
## Expansion No Expansion
## Expansion 78 20
## No Expansion 1 8
##
## Accuracy : 0.8037
## 95% CI : (0.7158, 0.8742)
## No Information Rate : 0.7383
## P-Value [Acc > NIR] : 0.07337
##
## Kappa : 0.3496
##
## Mcnemar's Test P-Value : 8.568e-05
##
## Sensitivity : 0.9873
## Specificity : 0.2857
## Pos Pred Value : 0.7959
## Neg Pred Value : 0.8889
## Prevalence : 0.7383
## Detection Rate : 0.7290
## Detection Prevalence : 0.9159
## Balanced Accuracy : 0.6365
##
## 'Positive' Class : Expansion
##
# Additional model measures
ebac_doms_epi <- epiR::epi.tests(table(ebac_doms_tot_predict, ebac_doms_metab_labs), conf.level = 0.95)
ebac_doms_confusion_df <- ebac_doms_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nExpansion",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Expansion",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nExpansion" = "Test +",
"Predicted\nNo Expansion" = "Test -") %>%
column_to_rownames(var = "actual")
{
pdf(file = "./Results/Figure_6C.pdf", height = 10, width = 10)
plotIndiv(
ebac_doms_train_splsda_final,
comp = c(1,2),
ylim = c(-5,16),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = ebac_doms_train_background,
col = c("#FF0000", "black"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Expansion",
X.label = paste0("Component 1 (", round(ebac_doms_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(ebac_doms_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-3,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-10,
ebac_doms_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = 12,
x = 1.25,
substitute(bold("Expansion")), cex = 1.75, adj = 0, col = "#FF0000")
text(
y = -4,
x = 0.5,
substitute(bold("No Expansion")), cex = 1.75, adj = 0, col = "black")
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-2.25,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("ACC = ",
round(ebac_doms_cm$overall[1]*100, 1), "% [",
round(ebac_doms_cm$overall[3]*100, 1), "%, ",
round(ebac_doms_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-2.05,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("Sens. = ",
paste0(formatC(round(ebac_doms_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ebac_doms_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ebac_doms_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-1.85,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("Spec. = ",
paste0(formatC(round(ebac_doms_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ebac_doms_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ebac_doms_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-1.65,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("OR = ",
formatC(round(ebac_doms_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(ebac_doms_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(ebac_doms_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)
invisible(dev.off())
}
plotIndiv(
ebac_doms_train_splsda_final,
comp = c(1,2),
ylim = c(-5,16),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = ebac_doms_train_background,
col = c("#FF0000", "black"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Expansion",
X.label = paste0("Component 1 (", round(ebac_doms_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(ebac_doms_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-3,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-10,
ebac_doms_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = 12,
x = 1.25,
substitute(bold("Expansion")), cex = 1.75, adj = 0, col = "#FF0000")
text(
y = -4,
x = 0.5,
substitute(bold("No Expansion")), cex = 1.75, adj = 0, col = "black")
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-2.25,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("ACC = ",
round(ebac_doms_cm$overall[1]*100, 1), "% [",
round(ebac_doms_cm$overall[3]*100, 1), "%, ",
round(ebac_doms_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-2.05,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("Sens. = ",
paste0(formatC(round(ebac_doms_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ebac_doms_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ebac_doms_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-1.85,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("Spec. = ",
paste0(formatC(round(ebac_doms_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(ebac_doms_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(ebac_doms_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = min(ebac_doms_train_splsda_final$variates$X[,2])*-1.65,
x = min(ebac_doms_train_splsda_final$variates$X[,1])*-12,
paste0("OR = ",
formatC(round(ebac_doms_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(ebac_doms_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(ebac_doms_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)infx_metab_mat <-
metab_qual_anon %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound)) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
ungroup() %>%
right_join(peri_matrix_all %>% select(sampleID, bact_infection_present)) %>%
group_by(sampleID, compound, bact_infection_present) %>%
summarise(mvalue = mean(mvalue, na.rm = TRUE)) %>%
ungroup() %>%
mutate_all(~replace(., is.nan(.), NA)) %>%
select(sampleID, compound, mvalue, bact_infection_present) %>%
drop_na(sampleID) %>%
pivot_wider(names_from = "compound", values_from = "mvalue") %>%
filter(sampleID != "") %>%
mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>%
select(-bact_infection_present) %>%
column_to_rownames(var = "sampleID") %>%
select(-`NA`) %>%
filter_all(any_vars(!is.na(.)))
infx_metab_labs <-
infx_metab_mat %>%
rownames_to_column(var = "sampleID") %>%
left_join(peri_matrix_all %>% select(sampleID, bact_infection_present)) %>%
mutate(bact_infection_present = ifelse(grepl(x = bact_infection_present, pattern = "No"), "No Infection", "Infection")) %>%
pull(bact_infection_present)
dim(infx_metab_mat) #107 93 (means 93 compounds and 108 LT patients/infections)## [1] 107 93
length(infx_metab_labs) #107 (means 108 LT patients/infections)## [1] 107
# Begin model training
set.seed(1234)
infx_train <- sample(1:nrow(infx_metab_mat), as.integer(0.7*nrow(infx_metab_mat))) # randomly select 70% samples in training
infx_test <- setdiff(1:nrow(infx_metab_mat), infx_train) # rest is part of the test set
# store matrices into training and test set:
infx_metab_mat.train <- infx_metab_mat[infx_train, ]
infx_metab_mat.test <- infx_metab_mat[infx_test,]
infx_metab_labs.train <- infx_metab_labs[infx_train]
infx_metab_labs.test <- infx_metab_labs[infx_test]
# Train the model to tune hyperparameters
# Initial model to find optimal number of components to include
set.seed(1234)
infx_train_splsda <- mixOmics::splsda(infx_metab_mat.train, infx_metab_labs.train, ncomp = 5)
# Performance assessment
## 5-fold, 100-repeat cross validation
set.seed(1234)
infx_train_plsda_perf <-
perf(
infx_train_splsda,
validation = "Mfold",
folds = 5,
progressBar = FALSE,
auc = TRUE,
nrepeat = 50
)
plot(
infx_train_plsda_perf,
col = color.mixo(5:7),
sd = FALSE,
auc = TRUE,
legend.position = "horizontal"
) # ncomp = 1 is best for classification error rate and max.dist# Number of optimal variables to select for each component
infx_train_keepX <- c(1:10, seq(20, 108, 10))
set.seed(123)
infx_train_tune_splsda <-
mixOmics::tune.splsda(
infx_metab_mat.train,
infx_metab_labs.train,
ncomp = 3, # Choose 3 components (max) to be safe
validation = 'Mfold',
folds = 5,
dist = 'max.dist',
progressBar = FALSE,
auc = TRUE,
measure = "BER",
test.keepX = infx_train_keepX,
nrepeat = 50
)
plot(infx_train_tune_splsda, col = color.jet(3))infx_train_error <- infx_train_tune_splsda$error.rate
infx_train_ncomp <- infx_train_tune_splsda$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
infx_train_ncomp #1 component is optimal## [1] 1
infx_train_select_keepX <- infx_train_tune_splsda$choice.keepX[1:(infx_train_ncomp + 1)] # optimal number of variables to select per component
infx_train_select_keepX## comp1 comp2
## 90 3
# Final Model
infx_train_splsda_final <-
mixOmics::splsda(infx_metab_mat.train, infx_metab_labs.train, ncomp = (infx_train_ncomp + 1), keepX = infx_train_select_keepX)
# Test the model
infx_predict_train_splsda_final <- predict(infx_train_splsda_final, infx_metab_mat.test,
dist = "max.dist")
infx_predict_train_comp2 <- infx_predict_train_splsda_final$class$max.dist[,(infx_train_ncomp + 1)]
infx_union <- union(infx_predict_train_comp2, infx_metab_labs.test)
confusionMatrix(table(factor(infx_predict_train_comp2, infx_union),
factor(infx_metab_labs.test, infx_union)),
positive = "Infection")## Confusion Matrix and Statistics
##
##
## No Infection Infection
## No Infection 20 6
## Infection 6 1
##
## Accuracy : 0.6364
## 95% CI : (0.4512, 0.796)
## No Information Rate : 0.7879
## P-Value [Acc > NIR] : 0.9865
##
## Kappa : -0.0879
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.1429
## Specificity : 0.7692
## Pos Pred Value : 0.1429
## Neg Pred Value : 0.7692
## Prevalence : 0.2121
## Detection Rate : 0.0303
## Detection Prevalence : 0.2121
## Balanced Accuracy : 0.4560
##
## 'Positive' Class : Infection
##
infx_train_background <- background.predict(infx_train_splsda_final,
comp.predicted = 2,
xlim = c(-20,20),
ylim = c(-20,20),
dist = "centroids.dist")
# Model metrics for all samples
infx_tot <- predict(infx_train_splsda_final,
infx_metab_mat,
dist = "max.dist")
infx_tot_predict <- infx_tot$class$max.dist[,(infx_train_ncomp + 1)]
infx_tot_union <- union(infx_tot_predict, infx_metab_labs)
infx_cm <- confusionMatrix(table(factor(infx_tot_predict, infx_tot_union,
levels = c("Infection", "No Infection")),
factor(infx_metab_labs, infx_tot_union,
levels = c("Infection", "No Infection"))),
positive = "Infection")
infx_cm## Confusion Matrix and Statistics
##
##
## No Infection Infection
## No Infection 12 7
## Infection 15 73
##
## Accuracy : 0.7944
## 95% CI : (0.7054, 0.8664)
## No Information Rate : 0.7477
## P-Value [Acc > NIR] : 0.1582
##
## Kappa : 0.3958
##
## Mcnemar's Test P-Value : 0.1356
##
## Sensitivity : 0.9125
## Specificity : 0.4444
## Pos Pred Value : 0.8295
## Neg Pred Value : 0.6316
## Prevalence : 0.7477
## Detection Rate : 0.6822
## Detection Prevalence : 0.8224
## Balanced Accuracy : 0.6785
##
## 'Positive' Class : Infection
##
# Additional model measures
infx_epi <- epiR::epi.tests(table(infx_tot_predict, infx_metab_labs), conf.level = 0.95)
infx_confusion_df <- infx_epi$tab %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "actual") %>%
mutate(actual = case_when(grepl(actual, pattern = "+", fixed = TRUE) ~ "Actual\nInfection",
grepl(actual, pattern = "-", fixed = TRUE) ~ "Actual\nNo Infection",
TRUE ~ "Total")) %>%
dplyr::rename("Predicted\nInfection" = "Test +",
"Predicted\nNo Infection" = "Test -") %>%
column_to_rownames(var = "actual")
{
pdf(file = "./Results/Figure_7A.pdf", height = 10, width = 10)
plotIndiv(
infx_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = infx_train_background,
col = c("goldenrod", "gray75"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Infection Group",
X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = max(infx_train_splsda_final$variates$X[,2])*0.65,
x = max(infx_train_splsda_final$variates$X[,1])*0.4,
infx_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = 0,
x = 6.5,
substitute(bold("Infection")), cex = 1.75, adj = 0, col = "goldenrod")
text(
y = 4,
x = -4.5,
substitute(bold("No Infection")), cex = 1.75, adj = 0, col = "gray70")
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.6,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("ACC = ",
round(infx_cm$overall[1]*100, 1), "% [",
round(infx_cm$overall[3]*100, 1), "%, ",
round(infx_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.55,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("Sens. = ",
paste0(formatC(round(infx_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.5,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("Spec. = ",
paste0(formatC(round(infx_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.45,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("OR = ",
formatC(round(infx_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(infx_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(infx_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)
invisible(dev.off())
}
plotIndiv(
infx_train_splsda_final,
comp = c(1,2),
pch = 1,
ind.names = FALSE,
legend = FALSE,
background = infx_train_background,
col = c("goldenrod", "gray75"),
star = TRUE,
point.lwd = 0.5,
title = NULL,
size.title = 0.00001,
style = "graphics",
legend.title = "Infection Group",
X.label = paste0("Component 1 (", round(infx_train_splsda_final$prop_expl_var$X[1] * 100), "%)"),
Y.label = paste0("Component 2 (", round(infx_train_splsda_final$prop_expl_var$X[2] * 100), "%)")
)
addtable2plot(
y = max(infx_train_splsda_final$variates$X[,2])*0.65,
x = max(infx_train_splsda_final$variates$X[,1])*0.4,
infx_confusion_df,
bty = "o",
display.rownames = TRUE,
hlines = TRUE,
vlines = TRUE,
cex = 0.75,
bg = "white"
)
text(
y = 0,
x = 6.5,
substitute(bold("Infection")), cex = 1.75, adj = 0, col = "goldenrod")
text(
y = 4,
x = -4.5,
substitute(bold("No Infection")), cex = 1.75, adj = 0, col = "gray70")
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.6,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("ACC = ",
round(infx_cm$overall[1]*100, 1), "% [",
round(infx_cm$overall[3]*100, 1), "%, ",
round(infx_cm$overall[4]*100, 1), "%]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.55,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("Sens. = ",
paste0(formatC(round(infx_epi$detail[2][3,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][3,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][3,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.5,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("Spec. = ",
paste0(formatC(round(infx_epi$detail[2][4,], 3)*100, digits = 1, format = "f"), "%"), " [",
paste0(formatC(round(infx_epi$detail[3][4,], 3)*100, digits = 1, format = "f"), "%"),", ",
paste0(formatC(round(infx_epi$detail[4][4,], 3)*100, digits = 1, format = "f"), "%"),"]"),
cex = 0.75, adj = 0)
text(
y = max(infx_train_splsda_final$variates$X[,2])*0.45,
x = max(infx_train_splsda_final$variates$X[,1])*0.5,
paste0("OR = ",
formatC(round(infx_epi$detail[2][6,], 3), digits = 1, format = "f"), " [",
formatC(round(infx_epi$detail[3][6,], 3), digits = 1, format = "f"), ", ",
formatC(round(infx_epi$detail[4][6,], 3), digits = 1, format = "f"), "]"),
cex = 0.75, adj = 0)axis.title.cex = 3.275
axis.text.cex = 2.5
plot.text.cex = 2.5
plot.group.cex = 2.5
point.cex = 3
{
pdf(file = "./Results/Figure_6AC.pdf", height = 17, width = 11)
par(mfrow = c(2, 1), mar = c(5, 5, 1, 1))
par(mar = c(5, 5, 2, 1))
# Figure 6A
plotIndiv(ecoc_doms_train_splsda_final, comp = c(1, 2), pch = 1,
ind.names = FALSE, legend = FALSE, background = ecoc_doms_train_background,
col = c("#0C7A3A", "black"), star = TRUE, point.lwd = 0.5,
title = NULL, size.title = 1e-05, style = "graphics",
legend.title = "Expansion", X.label = paste0("Component 1 (",
round(ecoc_doms_train_splsda_final$prop_expl_var$X[1] *
100), "%)"), Y.label = paste0("Component 2 (",
round(ecoc_doms_train_splsda_final$prop_expl_var$X[2] *
100), "%)"))
addtable2plot(y = min(ecoc_doms_train_splsda_final$variates$X[,
2]) * 1.05, x = min(ecoc_doms_train_splsda_final$variates$X[,
1]) * 1.15, ecoc_doms_confusion_df, bty = "o", display.rownames = TRUE,
hlines = TRUE, vlines = TRUE, cex = 0.75, bg = "white")
text(y = -3.5, x = 1.5, substitute(bold("Expansion")), cex = 1.75,
adj = 0, col = "#0C7A3A")
text(y = 3, x = 0.75, substitute(bold("No Expansion")), cex = 1.75,
adj = 0, col = "black")
text(y = min(ecoc_doms_train_splsda_final$variates$X[, 2]) *
0.825, x = max(ecoc_doms_train_splsda_final$variates$X[,
1]) * 0.25, paste0("ACC = ", round(ecoc_doms_cm$overall[1] *
100, 1), "% [", round(ecoc_doms_cm$overall[3] * 100,
1), "%, ", round(ecoc_doms_cm$overall[4] * 100, 1), "%]"),
cex = 0.75, adj = 0)
text(y = min(ecoc_doms_train_splsda_final$variates$X[, 2]) *
0.875, x = max(ecoc_doms_train_splsda_final$variates$X[,
1]) * 0.25, paste0("Sens. = ", paste0(formatC(round(ecoc_doms_epi$detail[2][3,
], 3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(ecoc_doms_epi$detail[3][3,
], 3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(ecoc_doms_epi$detail[4][3,
], 3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = min(ecoc_doms_train_splsda_final$variates$X[, 2]) *
0.925, x = max(ecoc_doms_train_splsda_final$variates$X[,
1]) * 0.25, paste0("Spec. = ", paste0(formatC(round(ecoc_doms_epi$detail[2][4,
], 3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(ecoc_doms_epi$detail[3][4,
], 3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(ecoc_doms_epi$detail[4][4,
], 3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = min(ecoc_doms_train_splsda_final$variates$X[, 2]) *
0.975, x = max(ecoc_doms_train_splsda_final$variates$X[,
1]) * 0.25, paste0("OR = ", formatC(round(ecoc_doms_epi$detail[2][6,
], 3), digits = 1, format = "f"), " [", formatC(round(ecoc_doms_epi$detail[3][6,
], 3), digits = 1, format = "f"), ", ", formatC(round(ecoc_doms_epi$detail[4][6,
], 3), digits = 1, format = "f"), "]"), cex = 0.75, adj = 0)
# Figure 6C
par(mar = c(5, 5, 2, 1))
plotIndiv(ebac_doms_train_splsda_final, comp = c(1, 2), ylim = c(-5,
16), pch = 1, ind.names = FALSE, legend = FALSE, background = ebac_doms_train_background,
col = c("#FF0000", "black"), star = TRUE, point.lwd = 0.5,
title = NULL, size.title = 1e-05, style = "graphics",
legend.title = "Expansion", X.label = paste0("Component 1 (",
round(ebac_doms_train_splsda_final$prop_expl_var$X[1] *
100), "%)"), Y.label = paste0("Component 2 (",
round(ebac_doms_train_splsda_final$prop_expl_var$X[2] *
100), "%)"))
addtable2plot(y = min(ebac_doms_train_splsda_final$variates$X[,
2]) * -3, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -10, ebac_doms_confusion_df, bty = "o", display.rownames = TRUE,
hlines = TRUE, vlines = TRUE, cex = 0.75, bg = "white")
text(y = 12, x = 1.25, substitute(bold("Expansion")), cex = 1.75,
adj = 0, col = "#FF0000")
text(y = -4, x = 0.5, substitute(bold("No Expansion")), cex = 1.75,
adj = 0, col = "black")
text(y = min(ebac_doms_train_splsda_final$variates$X[, 2]) *
-2.25, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -12, paste0("ACC = ", round(ebac_doms_cm$overall[1] *
100, 1), "% [", round(ebac_doms_cm$overall[3] * 100,
1), "%, ", round(ebac_doms_cm$overall[4] * 100, 1), "%]"),
cex = 0.75, adj = 0)
text(y = min(ebac_doms_train_splsda_final$variates$X[, 2]) *
-2.05, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -12, paste0("Sens. = ", paste0(formatC(round(ebac_doms_epi$detail[2][3,
], 3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(ebac_doms_epi$detail[3][3,
], 3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(ebac_doms_epi$detail[4][3,
], 3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = min(ebac_doms_train_splsda_final$variates$X[, 2]) *
-1.85, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -12, paste0("Spec. = ", paste0(formatC(round(ebac_doms_epi$detail[2][4,
], 3) * 100, digits = 1, format = "f"), "%"), " [", paste0(formatC(round(ebac_doms_epi$detail[3][4,
], 3) * 100, digits = 1, format = "f"), "%"), ", ", paste0(formatC(round(ebac_doms_epi$detail[4][4,
], 3) * 100, digits = 1, format = "f"), "%"), "]"), cex = 0.75,
adj = 0)
text(y = min(ebac_doms_train_splsda_final$variates$X[, 2]) *
-1.65, x = min(ebac_doms_train_splsda_final$variates$X[,
1]) * -12, paste0("OR = ", formatC(round(ebac_doms_epi$detail[2][6,
], 3), digits = 1, format = "f"), " [", formatC(round(ebac_doms_epi$detail[3][6,
], 3), digits = 1, format = "f"), ", ", formatC(round(ebac_doms_epi$detail[4][6,
], 3), digits = 1, format = "f"), "]"), cex = 0.75, adj = 0)
invisible(dev.off())
}# Take absolute value to make diversity group loadings plot narrower
diversity_train_splsda_final$loadings$X <- abs(diversity_train_splsda_final$loadings$X)
# Combine Supplemental Figures 3A + 3B
{pdf(file = "./Results/Supplemental_Figure_3.pdf",
height = 8,
width = 11)
par(mfrow = c(1,3))
# Supplmental Figure 3A
plotLoadings(
diversity_train_splsda_final,
contrib = 'max',
method = 'mean',
comp = 1,
legend = FALSE,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 1.1,
size.title = rel(1),
ndisplay = 50
)
# Supplmental Figure 3B
plotLoadings(
diversity_train_splsda_final,
contrib = 'max',
method = 'mean',
comp = 2,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 1.1,
size.title = rel(1),
ndisplay = 50
)
invisible(dev.off())
}
par(mfrow = c(1,3))
# Supplmental Figure 3A
plotLoadings(
diversity_train_splsda_final,
contrib = 'max',
method = 'mean',
comp = 1,
legend = FALSE,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 0.6,
size.title = rel(1),
ndisplay = 50
)
# Supplmental Figure 3B
plotLoadings(
diversity_train_splsda_final,
contrib = 'max',
method = 'mean',
comp = 2,
legend.col = c("#EDE342", "#F69A97", "#FF51EB"),
size.name = 0.6,
size.title = rel(1),
ndisplay = 50
)#### Metaphlan + Qual Metabolites ####
heatmap_data_raw <- t_metaphlan %>%
drop_na(taxid) %>%
select(sampleID) %>%
group_by(sampleID) %>%
dplyr::slice(1) %>%
mutate(db = ifelse(grepl(sampleID, pattern = "lt"), "Liver Transplant", "Healthy Donor")) %>%
left_join(metab_qual_anon) %>%
mutate(compound = ifelse(compound == "isovaleric-acid", "isovalerate", compound),
compound = str_to_title(compound),
compound = recode(compound,
Preq1 = "PreQ1")) %>%
filter(compound %in% heatmap_cmpds$compound|is.na(compound)) %>%
group_by(sampleID, compound) %>%
slice_max(mvalue, with_ties = F, n = 1) %>%
ungroup() %>%
select(-db) %>%
left_join(metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, db)) %>%
distinct(sampleID, db), by = "sampleID") %>%
group_by(sampleID, compound) %>%
slice_max(mvalue, with_ties = F, n = 1) %>%
left_join(alpha_shannon) %>%
group_by(db) %>%
arrange(db, Shannon) %>%
ungroup()
diversity_train_splsda_final_comp2 <- mixOmics::selectVar(diversity_train_splsda_final, comp = 2)$name
gg_diversity_comp2 <- metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>% select(sampleID, diversity_group)) %>%
select(sampleID, diversity_group) %>%
distinct(sampleID, diversity_group) %>%
left_join(heatmap_data_raw) %>%
filter(compound %in% diversity_train_splsda_final_comp2 | is.na(compound)) %>%
mutate(diversity_group = factor(diversity_group, levels = c("Low Diversity", "Medium Diversity",
"High Diversity", "Healthy Donor"))) %>%
pivot_wider(id_cols = c(sampleID, diversity_group, patientID, db, Shannon), names_from = "compound", values_from = "mvalue", values_fill = NA) %>% # For missing values, just for plotting
pivot_longer(-c(sampleID, diversity_group, patientID, db, Shannon), names_to = "compound", values_to = "mvalue") %>%
mutate(compound = factor(compound, levels = c(diversity_train_splsda_final_comp2))) %>%
# filter(compound == "Omega-Muricholic Acid") %>% arrange(desc(mvalue))
filter(compound != "NA") %>%
group_by(sampleID, compound) %>%
dplyr::slice(1) %>%
ggplot(aes(x = reorder(sampleID, Shannon), y = mvalue, fill = diversity_group)) +
geom_col() +
scale_fill_manual(values = diversity_group_colors) +
theme_bw() +
theme(legend.position = "none",
axis.text.x=eb(),
axis.ticks.x=eb(),
# strip.text.x= et(angle = 0, size = 14, color = "black"),
strip.text.x = eb(),
strip.background = eb(),
strip.text.y = et(angle = 0, size = 14, color = "black", hjust = 0),
axis.title.y = et(color = "black", size = 14),
axis.text.y = et(color = "black", size = 12),
panel.spacing = unit(0.5, "lines"),
plot.margin = margin(t = 5,
r = 5,
b = 0,
l = 5),
panel.grid.minor = eb()) +
facet_grid(compound~diversity_group, scales = "free")+
scale_y_continuous(expand = expansion(mult = c(0.005,0.005))) +
ylab("Normalized Peak Area") +
xlab("")
pdf(file = "./Results/Metahphlan_Medium_Diversity_Compounds.pdf", height = 16, width = 20, onefile = FALSE)
gg.stack(gg_metaphlan,
gg_diversity_comp2,
heights = c(1, 4))
dev.off()## quartz_off_screen
## 2
# Take absolute value to make diversity group loadings plot
# narrower
ecoc_doms_train_splsda_final$loadings$X <- abs(ecoc_doms_train_splsda_final$loadings$X)
# Supplemental Figure 5AB
{
pdf(file = "./Results/Supplemental_Figure_6AB.pdf", height = 8,
width = 11)
par(mfrow = c(1, 3))
plotLoadings(ecoc_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
plotLoadings(ecoc_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#0C7A3A",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
plotLoadings(ecoc_doms_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("#0C7A3A", "black"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
plotLoadings(ecoc_doms_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("#0C7A3A", "black"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
ebac_doms_train_splsda_final$loadings$X <- abs(ebac_doms_train_splsda_final$loadings$X)
# Supplemental Figure 5CD
{
pdf(file = "./Results/Supplemental_Figure_6CD.pdf", height = 8,
width = 11)
par(mfrow = c(1, 3))
plotLoadings(ebac_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 1, legend = FALSE, legend.col = c("#FF0000",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
plotLoadings(ebac_doms_train_splsda_final, contrib = "max",
method = "mean", comp = 2, legend.col = c("#FF0000",
"black"), size.name = 1.1, size.title = rel(1), ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
plotLoadings(ebac_doms_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("#FF0000", "black"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
plotLoadings(ebac_doms_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("#FF0000", "black"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)# Take absolute value to make diversity group loadings plot
# narrower
infx_train_splsda_final$loadings$X <- abs(infx_train_splsda_final$loadings$X)
{
pdf(file = "./Results/Supplemental_Figure_7.pdf", height = 8,
width = 11)
par(mfrow = c(1, 3))
# Supplmental Figure 7A
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("goldenrod",
"gray75"), size.name = 1.1, size.title = rel(1),
ndisplay = 50)
# Supplmental Figure 7B
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("goldenrod", "gray75"), size.name = 1.1,
size.title = rel(1), ndisplay = 50)
invisible(dev.off())
}
par(mfrow = c(1, 3))
# Supplmental Figure 7A
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 1, legend = FALSE, legend.col = c("goldenrod", "gray75"),
size.name = 0.6, size.title = rel(1), ndisplay = 50)
# Supplmental Figure 7B
plotLoadings(infx_train_splsda_final, contrib = "max", method = "mean",
comp = 2, legend.col = c("goldenrod", "gray75"), size.name = 0.6,
size.title = rel(1), ndisplay = 50)flow_chart <- flow_exclusions(incl_counts = c(158, 130, 107,
25), total_label = "Total Patients Enrolled", incl_labels = c("Patients w/ Transplant",
"Patients Included", "Patients w/ Bacterial Infection"),
excl_labels = c("No Transplant", "No Sample In Study Period\nDay -7 to +30",
"Patients w/o Bacterial Infection"), show_count = TRUE)
flow_chartflow_chart %>%
export_svg() %>%
charToRaw() %>%
rsvg_pdf("./Results/Supplemental_Figure_5.pdf")## Vector of variables to summarize
demo_vars <- c("race", "sex", "age", "meld_transplant", "Alcoholic Hepatitis",
"Alcoholic Cirrhosis", "NAFLD/NASH", "Primary Sclerosing Cholangitis",
"Acute Viral Hepatitis", "Chronic Hepatitis B", "Chronic Hepatitis C",
"Autoimmune", "Wilson's Disease", "Alpha-1 Antitrypsin",
"Hemachromatosis", "Drug Induced Liver Injury or Toxin",
"Budd Chiari", "Cryptogenic", "Malignancy", "Other", "Dialysis",
"Pressers", "Mechanical Ventilation")
## Vector of categorical variables that need transformation
demo_cats <- c("race", "sex", "Alcoholic Hepatitis", "Alcoholic Cirrhosis",
"NAFLD/NASH", "Primary Sclerosing Cholangitis", "Acute Viral Hepatitis",
"Chronic Hepatitis B", "Chronic Hepatitis C", "Autoimmune",
"Wilson's Disease", "Alpha-1 Antitrypsin", "Hemachromatosis",
"Drug Induced Liver Injury or Toxin", "Budd Chiari", "Cryptogenic",
"Malignancy", "Other", "Dialysis", "Pressers", "Mechanical Ventilation")
tab1_1 <- CreateTableOne(vars = demo_vars, testNonNormal = "wilcox.test",
includeNA = TRUE, factorVars = demo_cats, strata = "any_infection",
data = demo)
summary(tab1_1) # Age is potentially skewed, need to state that it is skewed and re-run `CreateTableOne`##
## ### Summary of continuous variables ###
##
## any_infection: 0
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 82 0 0 51 17 55 40 63 2 77 -1.00 0.8
## meld_transplant 82 0 0 26 10 28 19 33 6 49 -0.04 -0.7
## ------------------------------------------------------------
## any_infection: 1
## n miss p.miss mean sd median p25 p75 min max skew kurt
## age 25 0 0 56 15 59 46 68 22 73 -0.9 0.002
## meld_transplant 25 0 0 27 10 30 19 33 11 42 -0.3 -1.057
##
## p-values
## pNormal pNonNormal
## age 0.2072738 0.1765252
## meld_transplant 0.6624494 0.6085202
##
## Standardize mean differences
## 1 vs 2
## age 0.2976093
## meld_transplant 0.1025141
##
## =======================================================================================
##
## ### Summary of categorical variables ###
##
## any_infection: 0
## var n miss p.miss
## race 82 0 0.0
##
##
##
##
##
##
##
## sex 82 0 0.0
##
##
## Alcoholic Hepatitis 82 0 0.0
##
##
## Alcoholic Cirrhosis 82 0 0.0
##
##
## NAFLD/NASH 82 0 0.0
##
##
## Primary Sclerosing Cholangitis 82 0 0.0
##
##
## Acute Viral Hepatitis 82 0 0.0
##
##
## Chronic Hepatitis B 82 0 0.0
##
##
## Chronic Hepatitis C 82 0 0.0
##
##
## Autoimmune 82 0 0.0
##
##
## Wilson's Disease 82 0 0.0
##
##
## Alpha-1 Antitrypsin 82 0 0.0
##
## Hemachromatosis 82 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 82 0 0.0
##
##
## Budd Chiari 82 0 0.0
##
## Cryptogenic 82 0 0.0
##
##
## Malignancy 82 0 0.0
##
##
## Other 82 0 0.0
##
##
## Dialysis 82 0 0.0
##
##
## Pressers 82 0 0.0
##
##
## Mechanical Ventilation 82 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 1 1.2 1.2
## Asian/Mideast Indian 6 7.3 8.5
## Black/African-American 9 11.0 19.5
## More than one Race 8 9.8 29.3
## Patient Declined 4 4.9 34.1
## Unknown 0 0.0 34.1
## White 54 65.9 100.0
##
## Female 38 46.3 46.3
## Male 44 53.7 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 42 51.2 51.2
## 1 40 48.8 100.0
##
## 0 72 87.8 87.8
## 1 10 12.2 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 82 100.0 100.0
## 1 0 0.0 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 77 93.9 93.9
## 1 5 6.1 100.0
##
## 0 80 97.6 97.6
## 1 2 2.4 100.0
##
## 0 82 100.0 100.0
##
## 0 82 100.0 100.0
## 1 0 0.0 100.0
##
## 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## 0 82 100.0 100.0
##
## 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## 0 65 79.3 79.3
## 1 17 20.7 100.0
##
## 0 74 90.2 90.2
## 1 8 9.8 100.0
##
## 0 60 73.2 73.2
## 1 22 26.8 100.0
##
## 0 76 92.7 92.7
## 1 6 7.3 100.0
##
## 0 77 93.9 93.9
## 1 5 6.1 100.0
##
## ------------------------------------------------------------
## any_infection: 1
## var n miss p.miss
## race 25 0 0.0
##
##
##
##
##
##
##
## sex 25 0 0.0
##
##
## Alcoholic Hepatitis 25 0 0.0
##
##
## Alcoholic Cirrhosis 25 0 0.0
##
##
## NAFLD/NASH 25 0 0.0
##
##
## Primary Sclerosing Cholangitis 25 0 0.0
##
##
## Acute Viral Hepatitis 25 0 0.0
##
##
## Chronic Hepatitis B 25 0 0.0
##
##
## Chronic Hepatitis C 25 0 0.0
##
##
## Autoimmune 25 0 0.0
##
##
## Wilson's Disease 25 0 0.0
##
##
## Alpha-1 Antitrypsin 25 0 0.0
##
## Hemachromatosis 25 0 0.0
##
##
## Drug Induced Liver Injury or Toxin 25 0 0.0
##
##
## Budd Chiari 25 0 0.0
##
## Cryptogenic 25 0 0.0
##
##
## Malignancy 25 0 0.0
##
##
## Other 25 0 0.0
##
##
## Dialysis 25 0 0.0
##
##
## Pressers 25 0 0.0
##
##
## Mechanical Ventilation 25 0 0.0
##
##
## level freq percent cum.percent
## American Indian or Alaska Native 0 0.0 0.0
## Asian/Mideast Indian 2 8.0 8.0
## Black/African-American 2 8.0 16.0
## More than one Race 2 8.0 24.0
## Patient Declined 1 4.0 28.0
## Unknown 2 8.0 36.0
## White 16 64.0 100.0
##
## Female 9 36.0 36.0
## Male 16 64.0 100.0
##
## 0 23 92.0 92.0
## 1 2 8.0 100.0
##
## 0 17 68.0 68.0
## 1 8 32.0 100.0
##
## 0 19 76.0 76.0
## 1 6 24.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 25 100.0 100.0
## 1 0 0.0 100.0
##
## 0 25 100.0 100.0
##
## 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## 0 19 76.0 76.0
## 1 6 24.0 100.0
##
## 0 20 80.0 80.0
## 1 5 20.0 100.0
##
## 0 16 64.0 64.0
## 1 9 36.0 100.0
##
## 0 20 80.0 80.0
## 1 5 20.0 100.0
##
## 0 23 92.0 92.0
## 1 2 8.0 100.0
##
##
## p-values
## pApprox pExact
## race 0.3074905 0.4275466
## sex 0.4953032 0.4904875
## Alcoholic Hepatitis 1.0000000 1.0000000
## Alcoholic Cirrhosis 0.2123469 0.1713637
## NAFLD/NASH 0.2590605 0.1979409
## Primary Sclerosing Cholangitis 0.3704754 0.3322151
## Acute Viral Hepatitis 0.6007080 0.5709809
## Chronic Hepatitis B 0.5271112 0.2336449
## Chronic Hepatitis C 0.6007080 0.5709809
## Autoimmune 0.4694777 0.5886832
## Wilson's Disease 1.0000000 0.5538202
## Alpha-1 Antitrypsin NA NA
## Hemachromatosis 0.5271112 0.2336449
## Drug Induced Liver Injury or Toxin 1.0000000 1.0000000
## Budd Chiari NA NA
## Cryptogenic 1.0000000 1.0000000
## Malignancy 0.9440592 0.7828238
## Other 0.3063991 0.1774710
## Dialysis 0.5266900 0.4513768
## Pressers 0.1465607 0.1238754
## Mechanical Ventilation 1.0000000 0.6642869
##
## Standardize mean differences
## 1 vs 2
## race 0.45891730
## sex 0.21130091
## Alcoholic Hepatitis 0.02568258
## Alcoholic Cirrhosis 0.34709743
## NAFLD/NASH 0.31029007
## Primary Sclerosing Cholangitis 0.39735971
## Acute Viral Hepatitis 0.32025631
## Chronic Hepatitis B 0.28867513
## Chronic Hepatitis C 0.32025631
## Autoimmune 0.36037499
## Wilson's Disease 0.08851811
## Alpha-1 Antitrypsin 0.00000000
## Hemachromatosis 0.28867513
## Drug Induced Liver Injury or Toxin 0.15713484
## Budd Chiari 0.00000000
## Cryptogenic 0.04264158
## Malignancy 0.07849392
## Other 0.29088216
## Dialysis 0.19854168
## Pressers 0.37578690
## Mechanical Ventilation 0.07437488
tableone_skewed <- c("age", "meld_transplant")
tab1_2 <- print(tab1_1, nonnormal = tableone_skewed, formatOptions = list(big.mark = ","))## Stratified by any_infection
## 0
## n 82
## race (%)
## American Indian or Alaska Native 1 ( 1.2)
## Asian/Mideast Indian 6 ( 7.3)
## Black/African-American 9 ( 11.0)
## More than one Race 8 ( 9.8)
## Patient Declined 4 ( 4.9)
## Unknown 0 ( 0.0)
## White 54 ( 65.9)
## sex = Male (%) 44 ( 53.7)
## age (median [IQR]) 55.00 [39.50, 63.00]
## meld_transplant (median [IQR]) 28.00 [19.00, 33.00]
## Alcoholic Hepatitis = 1 (%) 6 ( 7.3)
## Alcoholic Cirrhosis = 1 (%) 40 ( 48.8)
## NAFLD/NASH = 1 (%) 10 ( 12.2)
## Primary Sclerosing Cholangitis = 1 (%) 6 ( 7.3)
## Acute Viral Hepatitis = 1 (%) 4 ( 4.9)
## Chronic Hepatitis B = 1 (%) 0 ( 0.0)
## Chronic Hepatitis C = 1 (%) 4 ( 4.9)
## Autoimmune = 1 (%) 5 ( 6.1)
## Wilson's Disease = 1 (%) 2 ( 2.4)
## Alpha-1 Antitrypsin = 0 (%) 82 (100.0)
## Hemachromatosis = 1 (%) 0 ( 0.0)
## Drug Induced Liver Injury or Toxin = 1 (%) 1 ( 1.2)
## Budd Chiari = 0 (%) 82 (100.0)
## Cryptogenic = 1 (%) 4 ( 4.9)
## Malignancy = 1 (%) 17 ( 20.7)
## Other = 1 (%) 8 ( 9.8)
## Dialysis = 1 (%) 22 ( 26.8)
## Pressers = 1 (%) 6 ( 7.3)
## Mechanical Ventilation = 1 (%) 5 ( 6.1)
## Stratified by any_infection
## 1 p
## n 25
## race (%) 0.307
## American Indian or Alaska Native 0 ( 0.0)
## Asian/Mideast Indian 2 ( 8.0)
## Black/African-American 2 ( 8.0)
## More than one Race 2 ( 8.0)
## Patient Declined 1 ( 4.0)
## Unknown 2 ( 8.0)
## White 16 ( 64.0)
## sex = Male (%) 16 ( 64.0) 0.495
## age (median [IQR]) 59.00 [46.00, 68.00] 0.177
## meld_transplant (median [IQR]) 30.00 [19.00, 33.00] 0.609
## Alcoholic Hepatitis = 1 (%) 2 ( 8.0) 1.000
## Alcoholic Cirrhosis = 1 (%) 8 ( 32.0) 0.212
## NAFLD/NASH = 1 (%) 6 ( 24.0) 0.259
## Primary Sclerosing Cholangitis = 1 (%) 0 ( 0.0) 0.370
## Acute Viral Hepatitis = 1 (%) 0 ( 0.0) 0.601
## Chronic Hepatitis B = 1 (%) 1 ( 4.0) 0.527
## Chronic Hepatitis C = 1 (%) 0 ( 0.0) 0.601
## Autoimmune = 1 (%) 0 ( 0.0) 0.469
## Wilson's Disease = 1 (%) 1 ( 4.0) 1.000
## Alpha-1 Antitrypsin = 0 (%) 25 (100.0) NA
## Hemachromatosis = 1 (%) 1 ( 4.0) 0.527
## Drug Induced Liver Injury or Toxin = 1 (%) 0 ( 0.0) 1.000
## Budd Chiari = 0 (%) 25 (100.0) NA
## Cryptogenic = 1 (%) 1 ( 4.0) 1.000
## Malignancy = 1 (%) 6 ( 24.0) 0.944
## Other = 1 (%) 5 ( 20.0) 0.306
## Dialysis = 1 (%) 9 ( 36.0) 0.527
## Pressers = 1 (%) 5 ( 20.0) 0.147
## Mechanical Ventilation = 1 (%) 2 ( 8.0) 1.000
## Stratified by any_infection
## test
## n
## race (%)
## American Indian or Alaska Native
## Asian/Mideast Indian
## Black/African-American
## More than one Race
## Patient Declined
## Unknown
## White
## sex = Male (%)
## age (median [IQR]) nonnorm
## meld_transplant (median [IQR]) nonnorm
## Alcoholic Hepatitis = 1 (%)
## Alcoholic Cirrhosis = 1 (%)
## NAFLD/NASH = 1 (%)
## Primary Sclerosing Cholangitis = 1 (%)
## Acute Viral Hepatitis = 1 (%)
## Chronic Hepatitis B = 1 (%)
## Chronic Hepatitis C = 1 (%)
## Autoimmune = 1 (%)
## Wilson's Disease = 1 (%)
## Alpha-1 Antitrypsin = 0 (%)
## Hemachromatosis = 1 (%)
## Drug Induced Liver Injury or Toxin = 1 (%)
## Budd Chiari = 0 (%)
## Cryptogenic = 1 (%)
## Malignancy = 1 (%)
## Other = 1 (%)
## Dialysis = 1 (%)
## Pressers = 1 (%)
## Mechanical Ventilation = 1 (%)
write.csv(tab1_2, "./Results/Demo_Table_1.csv", row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
tab1_2_padjust1 <- read.csv("./Results/Demo_Table_1.csv") %>%
dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)
tab1_2_padjust2 <- tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = tab1_2_padjust1$` `))
tab1_2_padjust3 <- tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & test == "", "chi.sq", test)) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(tab1_2_padjust3, "./Results/Demo_Table_1_padjust.csv",
row.names = FALSE)# Percentage of infections # where there is an infection
# within 30 days after transplant and not 0 days before
cult_percent_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", between(eday, 0,
30)) %>%
group_by(patientID, sampleID, eday) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, sampleID, bact_infection_present, infx_stool,
organism1, micro1.factor) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, sampleID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:micro1.factor), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(sampleID, infx_stool, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(organisms != "No Bacterial Infection") %>%
group_by(patientID, bact_infection_present, organisms) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
group_by(patientID, infx_stool, organisms) %>%
dplyr::slice(1) %>%
group_by(organisms) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
replace_na(list(group = "unknown")) %>%
group_by(organisms) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
# Location of infections # where there is an infection
# within 30 days after transplant and not 0 days before
cult_location_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", between(eday, 0,
30)) %>%
group_by(patientID, eday, key) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(patientID, bact_infection_present, infx_stool, organism1,
key) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:key), names_to = "organisms", values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(patientID, key, infx_stool, bact_infection_present) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1) %>%
filter(organisms != "No Bacterial Infection") %>%
group_by(patientID, infx_stool, key) %>%
dplyr::slice(1) %>%
group_by(key) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
replace_na(list(group = "unknown")) %>%
group_by(key) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
# Type of Infection # where there is an infection within 30
# days after transplant and not 0 days before
cult_type_infx_all <- peri_criteria_all %>%
filter(bact_infection_present == "Yes", grepl(variable, pattern = "anatomy.+"),
between(eday, 0, 30)) %>%
group_by(patientID, eday, diag_cat2) %>%
arrange(-infx_stool) %>%
dplyr::slice(1) %>%
ungroup() %>%
mutate(diag_cat3 = case_when(grepl("abdominal", diag_cat2,
ignore.case = T) ~ "Abdominal", grepl("vir|bronchitis|covid-19|cmv",
diag_cat2, ignore.case = T) ~ "Viral", grepl("bacteremia",
diag_cat2, ignore.case = T) ~ "Bacteremia", grepl("thrush",
diag_cat2, ignore.case = T) ~ "Fungal", grepl("cholangitis|empyema|panniculitis|peritonitis|latent tb",
diag_cat2, ignore.case = T) ~ "Bacterial", grepl("cystitis|pyelonephritis",
diag_cat2, ignore.case = T) ~ "Urinary Tract Infection",
grepl("pneumonia", diag_cat2, ignore.case = T) ~ "Pneumonia",
TRUE ~ diag_cat2), diag_cat3 = str_to_title(diag_cat3)) %>%
ungroup() %>%
select(patientID, bact_infection_present, infx_stool, organism1,
micro1.factor, key, diag_cat3) %>%
distinct() %>%
mutate(organism1 = gsub(x = organism1, pattern = "\\s+",
replacement = ""), organism1 = str_to_lower(string = organism1),
organism1 = ifelse(grepl(x = organism1, pattern = "enterococcus|enterobacterales|klebsiella|escherichia|citrobacter|proteus|staphyl|clostrid|pseudo|steno|bacteroides|helico"),
organism1, "Culture Negative")) %>%
group_by(patientID, infx_stool) %>%
mutate(`Enterococcus faecium` = grepl(x = organism1, pattern = "enterococcusfaecium",
ignore.case = T), `Enterococcus faecalis` = grepl(x = organism1,
pattern = "enterococcusfaecalis", ignore.case = T), `Enterococcus avium` = grepl(x = organism1,
pattern = "enterococcusavium", ignore.case = T), `Enterococcus gallinarum` = grepl(x = organism1,
pattern = "enterococcusgallinarum", ignore.case = T),
`Klebsiella pneumoniae` = grepl(x = organism1, pattern = "klebsiellapneumoniae",
ignore.case = T), `Enterobacter cloaceae` = grepl(x = organism1,
pattern = "enterobactercloaceae", ignore.case = T),
`Escherichia coli` = grepl(x = organism1, pattern = "escherichiacoli",
ignore.case = T), `Citrobacter freundii` = grepl(x = organism1,
pattern = "citrobacterfreundii", ignore.case = T),
`Proteus mirabilis` = grepl(x = organism1, pattern = "proteusmirabilis",
ignore.case = T), `Staphylococcus aureus` = grepl(x = organism1,
pattern = "staphylococcusaureus", ignore.case = T),
`Staphylococcus epidermis` = grepl(x = organism1, pattern = "staphylococcusepidermidis",
ignore.case = T), `Pseudomonas aeruginosa` = grepl(x = organism1,
pattern = "pseudomonasaeruginosa", ignore.case = T),
`Stenotrophmonas maltophilia` = grepl(x = organism1,
pattern = "stenotrophmonasmaltophilia", ignore.case = T),
`Helicobacter pylori` = grepl(x = organism1, pattern = "helicobacterpylori",
ignore.case = T), `Clostridium difficile` = grepl(x = organism1,
pattern = "clostridiumdifficile|clostridioidesdifficile",
ignore.case = T), `Bacteroides sp.` = grepl(x = organism1,
pattern = "bacteroides", ignore.case = T), `Culture Negative` = grepl(x = organism1,
pattern = "Culture Negative", ignore.case = T)) %>%
pivot_longer(-c(patientID:diag_cat3), names_to = "organisms",
values_to = "org_presence") %>%
mutate(organisms = ifelse(bact_infection_present == "No",
"No Bacterial Infection", organisms), org_presence = ifelse(org_presence ==
TRUE, 1, 0)) %>%
group_by(patientID, diag_cat3, infx_stool, bact_infection_present) %>%
dplyr::slice_max(org_presence) %>%
ungroup() %>%
filter(org_presence == 1, organisms != "No Bacterial Infection") %>%
ungroup() %>%
group_by(patientID, infx_stool, diag_cat3) %>%
dplyr::slice(1) %>%
group_by(diag_cat3) %>%
tally() %>%
ungroup() %>%
mutate(total_infections = sum(n)) %>%
group_by(diag_cat3) %>%
dplyr::summarize(percent = sum(n)/total_infections * 100,
count = sum(n))
culture_tot <- bind_rows(cult_percent_infx_all, cult_location_infx_all,
cult_type_infx_all) %>%
mutate(percent = round(percent, 2))
# Read in csv to then append adjusted pvalues
write.csv(culture_tot, "./Results/Supplemental_Table_3.csv",
row.names = FALSE)## Vector of variables to summarize
abx_vars <- c("Basiliximab", "Mycophenolate", "Steroid", "Systemic Vancomycin",
"Tacrolimus", "Cefepime", "Metronidazole", "Piperacillin/Tazobactam",
"Rifaximin", "Ceftriaxone", "Ciprofloxacin", "Gentamicin",
"Tobramicin", "Daptomycin", "Meropenem", "Oral Vancomycin")
abx2 <- abx %>%
filter(grepl(pattern = "basilix|tacro|steroid|mycophenolate|lactulose",
medication_name, ignore.case = T) | grepl(pattern = "GLUCOCORTICOIDS|steroid",
med_pharm_class, ignore.case = T) | grepl(pattern = "steroid",
med_pharm_sub_class, ignore.case = T) | grepl("given",
mar_action, ignore.case = T)) %>%
mutate(Immunosuppressants = case_when(grepl("basilix", medication_name,
ignore.case = T) ~ "Basiliximab", grepl("tacro", medication_name,
ignore.case = T) ~ "Tacrolimus", grepl("one|ide|solu-cortef",
medication_name, ignore.case = T) ~ "Steroid", grepl("mycophenolate",
medication_name, ignore.case = T) ~ "Mycophenolate",
grepl("lactulose", medication_name, ignore.case = T) ~
"Lactulose"), Antibiotics = case_when(grepl("rifaximin",
medication_name, ignore.case = T) ~ "Rifaximin", grepl("lactulose",
medication_name, ignore.case = T) ~ "Lactulose", grepl("ceftriaxone",
medication_name, ignore.case = T) ~ "Ceftriaxone", grepl("piperacillin|tazobactam",
medication_name, ignore.case = T) ~ "Piperacillin/Tazobactam",
grepl("cefepime", medication_name, ignore.case = T) ~
"Cefepime", grepl("meropenem", medication_name, ignore.case = T) ~
"Meropenem", grepl("gentamicin", medication_name,
ignore.case = T) ~ "Gentamicin", grepl("tobramycin",
medication_name, ignore.case = T) ~ "Tobramicin",
grepl("vancomycin.+oral", medication_name, ignore.case = T) ~
"Oral Vancomycin", grepl("vancomycin.+(IV|Intravenous)",
medication_name, ignore.case = T) ~ "Systemic Vancomycin",
grepl("METRONIDAZOLE", medication_name, ignore.case = T) ~
"Metronidazole", grepl("DAPTOMYCIN", medication_name,
ignore.case = T) ~ "Daptomycin", grepl("linezolid",
medication_name, ignore.case = T) ~ "Linezolid",
grepl("fluconazole", medication_name, ignore.case = T) ~
"Fluconazole", grepl("micafungin", medication_name,
ignore.case = T) ~ "Micafungin", grepl("cipro", medication_name,
ignore.case = T) & !grepl("drop", dose_units, ignore.case = T) ~
"Ciprofloxacin"), action = case_when(!is.na(Immunosuppressants) &
between(days_transplant, 0, 30) | !is.na(Immunosuppressants) &
ordering_mode == "Outpatient" ~ "keep", !is.na(Antibiotics) &
between(days_transplant, -7, 1) ~ "keep", TRUE ~ "remove")) %>%
group_by(patientID, Immunosuppressants, Antibiotics) %>%
arrange(days_transplant) %>%
filter(action == "keep") %>%
dplyr::slice(1) %>%
select(patientID, Immunosuppressants, Antibiotics) %>%
left_join(peri_matrix_all %>%
select(patientID, any_infection)) %>%
pivot_longer(!c(patientID, any_infection), names_to = "variable",
values_to = "value") %>%
drop_na(value) %>%
mutate(variable = 1) %>%
pivot_wider(c(patientID, any_infection), names_from = "value",
values_from = "variable", values_fn = min) %>%
replace(is.na(.), 0)
abx_tab1_1 <- CreateTableOne(vars = abx_vars, testNonNormal = "wilcox.test",
includeNA = FALSE, factorVars = abx_vars, strata = "any_infection",
data = abx2)
summary(abx_tab1_1)##
## ### Summary of categorical variables ###
##
## any_infection: 0
## var n miss p.miss level freq percent cum.percent
## Basiliximab 82 0 0.0 0 28 34.1 34.1
## 1 54 65.9 100.0
##
## Mycophenolate 82 0 0.0 0 10 12.2 12.2
## 1 72 87.8 100.0
##
## Steroid 82 0 0.0 1 82 100.0 100.0
##
## Systemic Vancomycin 82 0 0.0 0 40 48.8 48.8
## 1 42 51.2 100.0
##
## Tacrolimus 82 0 0.0 1 82 100.0 100.0
##
## Cefepime 82 0 0.0 0 56 68.3 68.3
## 1 26 31.7 100.0
##
## Metronidazole 82 0 0.0 0 48 58.5 58.5
## 1 34 41.5 100.0
##
## Piperacillin/Tazobactam 82 0 0.0 0 12 14.6 14.6
## 1 70 85.4 100.0
##
## Rifaximin 82 0 0.0 0 45 54.9 54.9
## 1 37 45.1 100.0
##
## Ceftriaxone 82 0 0.0 0 66 80.5 80.5
## 1 16 19.5 100.0
##
## Ciprofloxacin 82 0 0.0 0 67 81.7 81.7
## 1 15 18.3 100.0
##
## Gentamicin 82 0 0.0 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## Tobramicin 82 0 0.0 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## Daptomycin 82 0 0.0 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## Meropenem 82 0 0.0 0 78 95.1 95.1
## 1 4 4.9 100.0
##
## Oral Vancomycin 82 0 0.0 0 81 98.8 98.8
## 1 1 1.2 100.0
##
## ------------------------------------------------------------
## any_infection: 1
## var n miss p.miss level freq percent cum.percent
## Basiliximab 25 0 0.0 0 7 28.0 28.0
## 1 18 72.0 100.0
##
## Mycophenolate 25 0 0.0 0 1 4.0 4.0
## 1 24 96.0 100.0
##
## Steroid 25 0 0.0 1 25 100.0 100.0
##
## Systemic Vancomycin 25 0 0.0 0 8 32.0 32.0
## 1 17 68.0 100.0
##
## Tacrolimus 25 0 0.0 1 25 100.0 100.0
##
## Cefepime 25 0 0.0 0 14 56.0 56.0
## 1 11 44.0 100.0
##
## Metronidazole 25 0 0.0 0 13 52.0 52.0
## 1 12 48.0 100.0
##
## Piperacillin/Tazobactam 25 0 0.0 0 2 8.0 8.0
## 1 23 92.0 100.0
##
## Rifaximin 25 0 0.0 0 13 52.0 52.0
## 1 12 48.0 100.0
##
## Ceftriaxone 25 0 0.0 0 18 72.0 72.0
## 1 7 28.0 100.0
##
## Ciprofloxacin 25 0 0.0 0 21 84.0 84.0
## 1 4 16.0 100.0
##
## Gentamicin 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## Tobramicin 25 0 0.0 0 22 88.0 88.0
## 1 3 12.0 100.0
##
## Daptomycin 25 0 0.0 0 22 88.0 88.0
## 1 3 12.0 100.0
##
## Meropenem 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
## Oral Vancomycin 25 0 0.0 0 24 96.0 96.0
## 1 1 4.0 100.0
##
##
## p-values
## pApprox pExact
## Basiliximab 0.74143512 0.63341955
## Mycophenolate 0.42082759 0.45169519
## Steroid NA NA
## Systemic Vancomycin 0.21234695 0.17136371
## Tacrolimus NA NA
## Cefepime 0.37287647 0.33702201
## Metronidazole 0.72844870 0.64658181
## Piperacillin/Tazobactam 0.60142514 0.51269562
## Rifaximin 0.98119534 0.82233980
## Ceftriaxone 0.53110302 0.40817712
## Ciprofloxacin 1.00000000 1.00000000
## Gentamicin 0.95599591 0.41438900
## Tobramicin 0.42443880 0.35037337
## Daptomycin 0.05938894 0.03899733
## Meropenem 1.00000000 1.00000000
## Oral Vancomycin 0.95599591 0.41438900
##
## Standardize mean differences
## 1 vs 2
## Basiliximab 0.13310348
## Mycophenolate 0.30385759
## Steroid 0.00000000
## Systemic Vancomycin 0.34709743
## Tacrolimus 0.00000000
## Cefepime 0.25550557
## Metronidazole 0.13174842
## Piperacillin/Tazobactam 0.21056770
## Rifaximin 0.05772164
## Ceftriaxone 0.20043582
## Ciprofloxacin 0.06085600
## Gentamicin 0.17507370
## Tobramicin 0.25833951
## Daptomycin 0.44449214
## Meropenem 0.04264158
## Oral Vancomycin 0.17507370
abx_tab1_2 <- print(abx_tab1_1, formatOptions = list(big.mark = ","))## Stratified by any_infection
## 0 1 p test
## n 82 25
## Basiliximab = 1 (%) 54 ( 65.9) 18 ( 72.0) 0.741
## Mycophenolate = 1 (%) 72 ( 87.8) 24 ( 96.0) 0.421
## Steroid = 1 (%) 82 (100.0) 25 (100.0) NA
## Systemic Vancomycin = 1 (%) 42 ( 51.2) 17 ( 68.0) 0.212
## Tacrolimus = 1 (%) 82 (100.0) 25 (100.0) NA
## Cefepime = 1 (%) 26 ( 31.7) 11 ( 44.0) 0.373
## Metronidazole = 1 (%) 34 ( 41.5) 12 ( 48.0) 0.728
## Piperacillin/Tazobactam = 1 (%) 70 ( 85.4) 23 ( 92.0) 0.601
## Rifaximin = 1 (%) 37 ( 45.1) 12 ( 48.0) 0.981
## Ceftriaxone = 1 (%) 16 ( 19.5) 7 ( 28.0) 0.531
## Ciprofloxacin = 1 (%) 15 ( 18.3) 4 ( 16.0) 1.000
## Gentamicin = 1 (%) 1 ( 1.2) 1 ( 4.0) 0.956
## Tobramicin = 1 (%) 4 ( 4.9) 3 ( 12.0) 0.424
## Daptomycin = 1 (%) 1 ( 1.2) 3 ( 12.0) 0.059
## Meropenem = 1 (%) 4 ( 4.9) 1 ( 4.0) 1.000
## Oral Vancomycin = 1 (%) 1 ( 1.2) 1 ( 4.0) 0.956
write.csv(abx_tab1_2, "./Results/ABX_Table_1.csv", row.names = TRUE) # Saving then reading in the same data allows for an easy way to adjust p-values, since it loads the object as a dataframe
# Need to adjust pvalues and arrange properly....hence the
# multiple dataframes below
abx_tab1_2_padjust1 <- read.csv("./Results/ABX_Table_1.csv") %>%
dplyr::rename(` ` = X, `No Infection` = X0, `Bacterial Infection` = X1)
abx_tab1_2_padjust2 <- abx_tab1_2_padjust1 %>%
mutate(` ` = factor(` `, levels = abx_tab1_2_padjust1$` `))
abx_tab1_2_padjust3 <- abx_tab1_2_padjust1 %>%
mutate(test = ifelse(!is.na(p) & is.na(test), "chi.sq", "")) %>%
group_by(test) %>%
rstatix::adjust_pvalue(p.col = "p", method = "BH") %>%
ungroup() %>%
mutate(` ` = factor(` `, abx_tab1_2_padjust2$` `)) %>%
arrange(` `) %>%
mutate(p = ifelse(is.na(p), "", p), p.adj = ifelse(is.na(p.adj),
"", p.adj))
# Read in csv to then append adjusted pvalues
write.csv(abx_tab1_2_padjust3, "./Results/ABX_Table_1_padjust.csv",
row.names = FALSE)sample_days <- metaphlan_df_sumry %>%
group_by(sampleID) %>%
slice(1) %>%
filter(db == "Liver Transplant") %>%
select(sampleID, diversity_group) %>%
left_join(sample_lookup %>%
select(sampleID, sample_days_from_transplant)) %>%
ungroup()
# Quartiles of samples
clin_quartiles <- sample_days %>%
select(sample_days_from_transplant) %>%
summarise(minimum = min(sample_days_from_transplant, na.rm = T),
lower = unname(quantile(sample_days_from_transplant,
probs = 0.25, na.rm = T)), median = unname(quantile(sample_days_from_transplant,
probs = 0.5, na.rm = T)), upper = unname(quantile(sample_days_from_transplant,
probs = 0.75, na.rm = T)), maximum = max(sample_days_from_transplant,
na.rm = T)) %>%
mutate(placeholder = 1) %>%
pivot_longer(!placeholder, names_to = "ranges", values_to = "value")
sample_days_histogram <- sample_days %>%
ggplot(., aes(x = sample_days_from_transplant)) + geom_histogram(color = "black",
fill = "grey", binwidth = 1) + geom_vline(aes(xintercept = 0),
color = "orangered", linetype = "solid", size = 1.2) + geom_vline(aes(xintercept = median(sample_days_from_transplant,
na.rm = TRUE)), color = "cyan2", linetype = "dashed", size = 1.2) +
geom_text(aes(x = median(sample_days_from_transplant, na.rm = TRUE),
y = 12.5), color = "cyan2", label = paste0("Median = ",
clin_quartiles %>%
filter(ranges == "median") %>%
select(value), " days"), nudge_x = 4) + geom_text(label = "Transplant Date = Day 0",
x = -4, y = 12.5, color = "orangered") + theme_bw() + theme(panel.grid = eb(),
axis.text = et(color = "black", size = 12), axis.title = et(color = "black",
size = 14), legend.position = "none") + scale_x_continuous(breaks = seq(-7,
30, 5), limits = c(-7, 35)) + ylab("Number of Samples\n")
sample_days_boxplot <- sample_days %>%
ggplot(., aes(x = sample_days_from_transplant, y = diversity_group,
fill = diversity_group)) + geom_vline(aes(xintercept = 0),
color = "orangered", linetype = "solid", size = 1.2) + geom_vline(aes(xintercept = median(sample_days_from_transplant,
na.rm = TRUE)), color = "cyan2", linetype = "dashed", size = 1.2) +
geom_violin() + geom_boxplot(alpha = 0.5, width = 0.15, outlier.shape = NA,
fill = "white", color = "white") + stat_compare_means(comparisons = list(c("High Diversity",
"Medium Diversity"), c("High Diversity", "Low Diversity"),
c("Medium Diversity", "Low Diversity")), bracket.size = 0.3,
step.increase = 0.07) + theme_bw() + theme(panel.grid = eb(),
axis.text.x = et(color = "black", size = 12), axis.text.y = et(color = "black",
size = 12), axis.ticks.y = eb(), axis.title = et(color = "black",
size = 14), legend.position = "none") + scale_x_continuous(breaks = seq(-7,
30, 5), limits = c(-7, 35)) + scale_fill_manual(values = c("#3A001E",
"#8A0246", "#C20463")) + ylab("Diversity Groups\n") + xlab("\nDays to Sample")
pdf(file = "./Results/Supplemental_Figure_1.pdf", height = 10,
width = 12, onefile = F)
gg.stack(sample_days_histogram, sample_days_boxplot)
dev.off()## quartz_off_screen
## 2
gg.stack(sample_days_histogram, sample_days_boxplot)# Percent of samples with > 90% of a single taxon
metaphlan_df2 %>%
left_join(metaphlan_df_sumry %>%
select(sampleID, diversity_group)) %>%
filter(grepl(x = sampleID, pattern = "^lt")) %>%
group_by(sampleID) %>%
filter(pctseqs > 0.9) %>%
select(-y.text, -db) %>%
write.csv(., "./Results/90percent_taxon.csv", row.names = FALSE)save.image(file = "./Data/LT_Modeling.RData")